---
title: "Health & Wealth in India DLHS-4/AHS"
author: "Lara Jung"
date: "3/23/2018"
output: pdf_document
---




# Data cleaning and merging
Load libraries.
```{r load packages}

library(tidyverse) 
library(dplyr) 
library(forcats) # for categorical variables (R for data science rec) --> see https://rdrr.io/cran/forcats/man/fct_unify.html
library(haven) # for read_dta/read_csv, Translate value labels into a new labelled() class, which preserves the original semantics and can easily be coerced to factors with as_factor()
library(ggplot2) #plot data
library(ggrepel)# to avoid text labels in ggplot from overlapping
library(modelr)# to use "add_predictions()" for adding a column of predicted vals to your dataset
library(broom)# to create tidy data from model output
#library(broom.mixed) #to extract coefficents (for mixed models)
#library(lme4) # for multi-level modeling
#library(lmerTest) # for p-values with the lmer command
library(srvyr) # survey package that also works with dplyr 
library(speedglm) #glm analyssis
library(arm)#bayesglm
library(readstata13) #read dta datasets
library(tableone) # Creates a table 1 (summary characteristics)



```


The code chunk below takes the DLHS and AHS datasets that were cleaned in Stata and contain sampling weights, cleans them further, drops irrelevant columns, and then appends DLHS and AHS to yield a dataset for analysis ("India_DLHS_AHS").

```{r Cleaning and merging data, eval=FALSE}
# 1. Import DLHS data ########################################################################################

dlhs <- read.dta13("DLHS_w3_v2.dta")
dlhs <- as_tibble(dlhs)

# 2. Drop irrelevant columns in DLHS ##############################################################################################################
dlhs.cvd <- dplyr::select(dlhs, state, state_dist, female, educ, age1, rural, q_ai_nat_rurb, 
                      dropage, #age <18
                      pregnant, 
                      married,
                      prim_key,#p_id
                      
                      BMI,
                      bpsyst_avg, bpsyst_frst, bpsyst_sec, htn_treated, htn_broad_avg, bpdiast_avg,  
                      bpdiast_frst, bpdiast_sec,
                      glucose, diab_broad,diab_narrow, fasted, 
                      hv98,  # this is the smoking var
                      
                      hhid, DLHS, sweight, psu, # vars needed for weighting
                      ai_NAT_rurb,
                      hv12, educ, #original edcucat variable,
                      hv06,#usualresident
                     #hv90,
                    # hv90a
                      ) 

# 3. Filter out those <18 or pregnant  ##########################################################################################################
dlhs.cvd <- dplyr::filter(dlhs.cvd, dropage == 0) #age <18
dlhs.cvd <- dplyr::filter(dlhs.cvd, pregnant == 0 )  # only keep those who aren't pregnant (they didn't measure glucose in pregnant women anyway); DLHS has no missings in the pregnant variable
dlhs.cvd <- dplyr::select(dlhs.cvd, -dropage, -pregnant)
dlhs.cvd <- dlhs.cvd %>% 
  dplyr::rename(age = age1, 
                smoke = hv98) %>% 
  mutate(hhid=as.character(hhid))

dlhs.cvd$p_id <- dlhs.cvd$prim_key

# 4. Import AHS data ##################################################################################
ahs <- read.dta13("AHS_w3_v2.dta")
ahs <- as_tibble(ahs)



# 5. Drop irrelevant columns in AHS ######################################################################################################
ahs.cvd <- dplyr::select(ahs, state, state_dist, female, educ, age1, rural, q_ai_nat_rurb,
                      dropage, pregnant,
                      married,
                      
                      BMI,
                      bpsyst_avg, bpsyst_frst, bpsyst_sec, htn_treated, htn_broad_avg, 
                      bpdiast_frst,bpdiast_avg, bpdiast_sec,
                      glucose, diab_broad,diab_narrow, 
                      smoke,  # this is the equivalent to hv98 in dlhs
                      
                      fid, DLHS, sweight, psu,sl_no,
                      # vars needed for weighting; fid is the hhid equivalent, sl_no for p_id
                     ai_NAT_rurb,
                     highest_qualification,educ,#original educat variable,
                     ) 


#p_id
ahs.cvd<-mutate(ahs.cvd,sl_no=sprintf("%02s",ahs.cvd$sl_no))
ahs.cvd$p_id <- paste(ahs.cvd$fid, ahs.cvd$sl_no, sep="")


ahs.cvd <- ahs.cvd %>% 
  dplyr::rename(age = age1) %>% 
  mutate(hhid=as.character(fid))  # fid is the household ID in the AHS. 

# 6. Filter out those pregnant and <18 ###################################################################################################

ahs.cvd <- dplyr::filter(ahs.cvd, dropage == 0) #age <18
ahs.cvd <- dplyr::filter(ahs.cvd, pregnant == 0 | is.na(pregnant)==TRUE)  
ahs.cvd <- dplyr::select(ahs.cvd, -dropage, -pregnant)

# 7. Merge AHS AND DLHS  ##############################################################################
#participant numbers
#dlhs unfilterd: 1568516 
#ahs unfiltered: 891470
#ahs filtered: 607227
#dlhs filtered: 1011694
#India_DLHS_AHS: 1618921

dlhs.cvd$state <- as.factor(dlhs.cvd$state)
ahs.cvd$state <- as.factor(ahs.cvd$state)
India_DLHS_AHS <- bind_rows(dlhs.cvd, ahs.cvd) # ahs.cvd has two less columns than dlhs.cvd because the fasted variable and psu is only in DLHS. R sets this to missing for AHS which is fine. 

#check survey variable
summary(as.factor(India_DLHS_AHS$DLHS))
#no missings, 1011694 1's (equals DLHS participants)

# 8. State and district vars corrections######################

# add state names: This step is necessary because the DLHS states for some reason came without labels (just IDs)


India_DLHS_AHS <- mutate(India_DLHS_AHS,
  state = ifelse(state==2, "Himachal Pradesh",
  ifelse(state==3, "Punjab",
  ifelse(state==4, "Chandigarh",
  ifelse(state==6, "Haryana",
  ifelse(state==7, "NCT of Delhi",
  ifelse(state==11, "Sikkim",
  ifelse(state==12, "Arunachal Pradesh",
  ifelse(state==13, "Nagaland",
  ifelse(state==14, "Manipur",
  ifelse(state==15, "Mizoram",
  ifelse(state==16, "Tripura",
  ifelse(state==17, "Meghalaya",
  ifelse(state==19, "West Bengal",
  ifelse(state==25, "Daman and Diu",
  ifelse(state==27, "Maharashtra",
  ifelse(state==28, "Andhra Pradesh",
  ifelse(state==29, "Karnataka",
  ifelse(state==30, "Goa",
  ifelse(state==32, "Kerala",
	ifelse(state==33, "Tamil Nadu",
	ifelse(state==34, "Puducherry",
	ifelse(state==35, "Andaman and Nicobar",
	ifelse(state==36, "Telangana",
	ifelse(state=="Uttarakhand", "Uttarakhand",
	ifelse(state=="Rajasthan", "Rajasthan",
	ifelse(state=="Uttar Pradesh", "Uttar Pradesh",
	ifelse(state=="Bihar", "Bihar",
	ifelse(state=="Assam", "Assam",
	ifelse(state=="Jharkhand", "Jharkhand",
	ifelse(state=="Odisha", "Odisha",
	ifelse(state=="Chhattisgarh", "Chhattisgarh", "Madhya Pradesh"))))))))))))))))))))))))))))))))

India_DLHS_AHS<-India_DLHS_AHS%>% dplyr::rename(ex_state_ind = state) 
India_DLHS_AHS$ex_state_ind <- as.factor(India_DLHS_AHS$ex_state_ind)

#check
summary(India_DLHS_AHS$ex_state_ind)
#no missings

# merge district ID's and district names- so we can merge the GDP/capita district data later on!

d_name_DLHS_4_AHS <- read_csv("DLHS_4_AHS_district_names.csv")
d_name_DLHS_4_AHS$d_id <- as.character(d_name_DLHS_4_AHS$d_id)
India_DLHS_AHS$d_id<-India_DLHS_AHS$state_dist
India_DLHS_AHS <- dplyr::full_join(India_DLHS_AHS, d_name_DLHS_4_AHS, by= "d_id")


India_DLHS_AHS <- dplyr::mutate(India_DLHS_AHS, ex_d_name_ind= ifelse(ex_d_name_ind=="Sant Ravidas Nagar Bhadohi","Sant Ravidas Nagar (Bhadohi)", 
ifelse(ex_d_name_ind=="South Twentyfour Parganas","South Twenty Four Parganas", 
ifelse(ex_d_name_ind=="North Twentyfour Parganas","North Twenty Four Parganas", 
ifelse(ex_d_name_ind=="Lahul and Spiti","Lahul & Spiti", 
ifelse(ex_d_name_ind=="Thoothukkudi","Thoothukudi",      
ifelse(ex_d_name_ind=="Mumbai Suburban","Mumbai (Suburban)", 
ifelse(ex_d_name_ind=="Barabanki", "Bara Banki", 
ifelse(ex_d_name_ind=="East Nimar", "Khandwa (East Nimar)", 
ifelse(ex_d_name_ind=="Papumpare", "Papum Pare", 
ifelse(ex_d_name_ind=="Purab Medinipur", "Purba Medinipur", 
ifelse(ex_d_name_ind=="Ribhoi", "Ri Bhoi", 
ifelse(ex_d_name_ind=="Siddharthnagar", "Siddharth Nagar", 
ifelse(ex_d_name_ind=="The Nilgiris", "Nilgiris", 
ifelse(ex_d_name_ind=="Pakaur", "Pakur", 
ifelse(ex_d_name_ind=="Pudukottai", "Pudukkottai", 
ifelse(ex_d_name_ind=="Sivganga", "Sivaganga",  
ifelse(ex_d_name_ind=="North and Middle Andaman", "North & Middle Andaman",  
ifelse(ex_d_name_ind=="SAS Nagar (Mohali)", "Sahibzada Ajit Singh Nagar",  
ifelse(ex_d_name_ind=="Nellore", "Sri Potti Sriramulu Nellore", 
ifelse(ex_d_name_ind=="Cuddapah", "Y.S.R.", 
ifelse(ex_d_name_ind=="North Cachar Hills", "Dima Hasao",       
ifelse(ex_d_name_ind=="Nawanshahr", "Shahid Bhagat Singh Nagar",      
ifelse(ex_d_name_ind=="Mahrajganj", "Maharajganj",  
ifelse(ex_d_name_ind=="Marigaon", "Morigaon",  
ifelse(ex_d_name_ind=="Sibsaga", "Sivasagar",  
ifelse(ex_d_name_ind=="Pachim Medinipur", "Paschim Medinipur",  
ifelse(ex_d_name_ind=="Sonapur", "Subarnapur",  
ifelse(ex_d_name_ind=="Koriya", "Korea (Koriya)",  
ifelse(ex_d_name_ind=="Kawardha", "Kabirdham",  
ifelse(ex_d_name_ind=="Kanker", "Uttar Bastar Kanker",  
ifelse(ex_d_name_ind=="Dantewada", "Dakshin Bastar Dantewada", 
ifelse(ex_d_name_ind=="West Nimar", "Khargone (West Nimar)", 
ifelse(ex_d_name_ind=="Sibsagar","Sivasagar" , 
ifelse(ex_d_name_ind=="Ramanathpuram","Ramanathapuram", 
ifelse(ex_d_name_ind=="Thirunelveli", "Tirunelveli", 
ifelse(ex_d_name_ind=="Pondicherry", "Puducherry", 
ifelse(ex_d_name_ind=="Kheri", "Lakhimpur Kheri", 
ifelse(ex_d_name_ind=="Senapati" | ex_d_name_ind=="Senapati (Excluding 3 Sub-Divisions)", "Senapati (DHS:Excluding 3 Sub-Divisions)", 
ifelse(ex_d_name_ind=="Chikkballapur","Chikkaballapura",
ex_d_name_ind))))))))))))))))))))))))))))))))))))))) )
 

#correcting names of districts that are spelled the same, but are not the same district(from different states) 
India_DLHS_AHS<- dplyr::mutate(India_DLHS_AHS, ex_d_name_ind=  
ifelse(d_id=="0206", "Hamirpur (Himachal Pradesh)",  
ifelse(d_id=="0938" ,"Hamirpur (Uttar Pradesh)", 
ifelse(d_id=="2207", "Bilaspur (Chhattisgarh)", 
ifelse(d_id=="0208", "Bilaspur (Himachal Pradesh)", 
ifelse(d_id=="0701", "North West Dheli", 
ifelse(d_id=="0702",  "North Delhi", 
ifelse(d_id=="0703", "North East Dheli", 
ifelse(d_id=="0704", "East Dheli", 
ifelse(d_id=="0705", "New Dheli", 
ifelse(d_id=="0706","Central Dheli", 
ifelse(d_id=="0707", "West Dheli", 
ifelse(d_id=="0708", "South West Dheli", 
ifelse(d_id=="0709", "South Dheli", 
ifelse(d_id=="1101", "North Sikkim", 
ifelse(d_id=="1102", "West Sikkim", 
ifelse(d_id=="1103", "South Sikkim", 
ifelse(d_id=="1104", "East Sikkim",                 
ifelse(d_id=="0943", "Pratapgarh (Uttar Pradesh)", 
ifelse(d_id=="2719","Aurangabad (Maharashtra)",  
ifelse(d_id=="1034","Aurangabad (Bihar)", 
ifelse(d_id=="2204","Raigarh (Chhattisgarh)",   
ifelse(d_id=="2724", "Raigarh (Maharashtra)",    
ifelse(d_id=="2903",  "Bijapur (Karnataka)", 
ifelse(d_id=="3311","The Nilgiris",
ifelse(d_id=="0913", "Hathras",
ex_d_name_ind))))))))))))))))))))))))))

India_DLHS_AHS$ex_d_name_ind<- as.factor(India_DLHS_AHS$ex_d_name_ind)

#check
summary(India_DLHS_AHS$ex_d_name_ind)
unique(India_DLHS_AHS$ex_d_name_ind)
unique(India_DLHS_AHS$state_dist)
#no missings
#561

```



#Create variables for analyisis

```{r Define CVD risk factor/ outcome variables}




# 1. diabetes ######################

#define variable & correct missings 

India_DLHS_AHS<- dplyr::mutate(India_DLHS_AHS, ex_diab_narrow_ind =
ifelse(is.na(glucose)==TRUE | is.na(fasted)==TRUE, NA,
ifelse( fasted==1 & glucose >=126 | fasted==0 & glucose >=200,1,0)))

India_DLHS_AHS<- dplyr::mutate(India_DLHS_AHS, ex_diab_narrow_ind =
ifelse( DLHS==0 & is.na(fasted)==TRUE& glucose >=126,1,
ifelse( DLHS==0 & is.na(fasted)==TRUE& glucose <126,0,
ifelse(ex_diab_narrow_ind ==1,1,0))))

India_DLHS_AHS$ex_diab_narrow_ind <- as.factor(India_DLHS_AHS$ex_diab_narrow_ind)#factor variable
India_DLHS_AHS<-mutate(India_DLHS_AHS,ex_diab_narrow_ind_dbl=as.numeric(as.character(ex_diab_narrow_ind)))#dbl variable

#check
summary(India_DLHS_AHS$ex_diab_narrow_ind)
#265019 missings
summary(as.factor(India_DLHS_AHS$fasted))
#769639 missings (includes 607227 AHS participants)
summary(India_DLHS_AHS$glucose)
#264966 missings

# create AHS not fasted variable: Assume all AHS partcicipants have not fasted before blood glucose measurement
India_DLHS_AHS<- dplyr::mutate(India_DLHS_AHS, ex_diab_narrow_ind_ahs_unfasted =
ifelse(is.na(glucose)==TRUE | is.na(fasted)==TRUE, NA,
ifelse( fasted==1 & glucose >=126 | fasted==0 & glucose >=200,1,0)))

India_DLHS_AHS<- dplyr::mutate(India_DLHS_AHS, ex_diab_narrow_ind_ahs_unfasted =
ifelse( DLHS==0 & is.na(fasted)==TRUE& glucose >=200,1,
ifelse( DLHS==0 & is.na(fasted)==TRUE& glucose <200,0,
ifelse(ex_diab_narrow_ind_ahs_unfasted ==1,1,0))))

India_DLHS_AHS$ex_diab_narrow_ind_ahs_unfasted <-as.factor(India_DLHS_AHS$ex_diab_narrow_ind_ahs_unfasted)#factor variable
India_DLHS_AHS<-mutate(India_DLHS_AHS,ex_diab_narrow_ind_ahs_unfasted_dbl=as.numeric(as.character(ex_diab_narrow_ind_ahs_unfasted)))#dbl variable

#check
summary(India_DLHS_AHS$ex_diab_narrow_ind_ahs_unfasted)
#265019 missings


# 2. hypertension ######################

#define variable & correct missings 


India_DLHS_AHS <- mutate(India_DLHS_AHS,ex_htn_narrow_ind=ifelse(is.na(bpsyst_avg)==T | is.na(bpdiast_avg)==T, NA, ifelse(bpsyst_avg>=140 | bpdiast_avg >=90, 1, 0)))

#check
summary(as.factor(India_DLHS_AHS$ex_htn_narrow_ind))
#212769 missings

India_DLHS_AHS <- mutate(India_DLHS_AHS,ex_htn_narrow_ind=ifelse(is.na(bpsyst_avg)==T | is.na(bpdiast_avg)==T, NA, ifelse(bpdiast_avg>bpsyst_avg,NA,ex_htn_narrow_ind)))
#check
summary(as.factor(India_DLHS_AHS$ex_htn_narrow_ind))
#213546 missings


India_DLHS_AHS$ex_htn_narrow_ind <- as.factor(India_DLHS_AHS$ex_htn_narrow_ind)#factor variable
India_DLHS_AHS<-mutate(India_DLHS_AHS,ex_htn_narrow_ind_dbl=as.numeric(as.character(ex_htn_narrow_ind)))#dbl variable



# 3. bmi ######################

# create groups
India_DLHS_AHS <- dplyr::mutate(India_DLHS_AHS, 
                            bmi_grp = ifelse(BMI<18.5, "<18.5",
                                            ifelse(BMI>=18.5 & BMI<23, "18.5-<23",
                                                  ifelse(BMI>=23 & BMI<25, "23-<25",
                                                        ifelse(BMI>=25 & BMI<30, "25-<30", ">=30")))))

India_DLHS_AHS$bmi_grp <- as.factor(India_DLHS_AHS$bmi_grp)
India_DLHS_AHS <- within(India_DLHS_AHS, bmi_grp <- relevel(bmi_grp, ref = "<18.5"))

# create obesity variable with bmi >27.5
India_DLHS_AHS <- India_DLHS_AHS %>% 
  mutate(bmigrt27.5 = as.factor(ifelse(is.na(BMI)==T , NA, ifelse(BMI>=27.5 , 1, 0))))
India_DLHS_AHS <- within(India_DLHS_AHS, bmigrt27.5 <- relevel(bmigrt27.5, ref = "0")) #factor variable
India_DLHS_AHS<-mutate(India_DLHS_AHS,bmigrt27.5_dbl=as.numeric(as.character(bmigrt27.5)))#dbl variable

#check
summary(India_DLHS_AHS$bmigrt27.5)
#215919 missings

# 4. currently smoking######################

#Create variable
India_DLHS_AHS <- mutate(India_DLHS_AHS, 
                   csmoke = ifelse(smoke==1 | smoke==2 | smoke == "occational-smoker" | smoke == "usual-smoker", 1,
                                      ifelse(smoke==3 | smoke==4 | smoke == "ex-smoker" | smoke == "never-smoked", 0, NA)))

India_DLHS_AHS$csmoke <- as.factor(India_DLHS_AHS$csmoke) #factor variable
India_DLHS_AHS<-mutate(India_DLHS_AHS,csmoke_dbl=as.numeric(as.character(csmoke)))#dbl variable

#check
summary(India_DLHS_AHS$csmoke)
#368248 missings


```

```{r create Individual-level dependent (level 1) variables}

#1. 5 year age groups####################

India_DLHS_AHS <- dplyr::mutate(India_DLHS_AHS, age_5yr = 
ifelse(age<20,1, 
ifelse(age>=20 & age<25,2,
ifelse(age>=25 & age<30,3,
ifelse(age>=30 & age<35,4,
ifelse(age>=35 & age<40,5,
ifelse(age>=40 & age<45,6,
ifelse(age>=45 & age<50,7,
ifelse(age>=50 & age<55,8,
ifelse(age>=55 & age<60,9,
ifelse(age>=60 & age<65,10,
ifelse(age>=65,11,NA
))))))))))))


India_DLHS_AHS <- dplyr::mutate(India_DLHS_AHS, 
age_grp=
ifelse(is.na(age_5yr)==T ,NA,
ifelse(age_5yr==1,"15-19", 
ifelse(age_5yr==2,"20-24",
ifelse(age_5yr==3,"25-29",
ifelse(age_5yr==4,"30-34",
ifelse(age_5yr==5,"35-39",
ifelse(age_5yr==6,"40-44",
ifelse(age_5yr==7,"45-49",
ifelse(age_5yr==8,"50-54",
ifelse(age_5yr==9,"55-59",
ifelse(age_5yr==10,"60-64",">65"                           
)))))))))))) 

India_DLHS_AHS$age_grp<-factor(India_DLHS_AHS$age_grp, levels = c("15-19","20-24","25-29","30-34","35-39","40-44","45-49"  ,"50-54","55-59","60-64",">65"))    
India_DLHS_AHS<- within(India_DLHS_AHS, age_grp <- relevel(age_grp, ref = "15-19"))

#check
summary(India_DLHS_AHS$age_grp)
#39 missings

#2. urban/rural####################

#define urban
India_DLHS_AHS<-mutate(India_DLHS_AHS, urban=ifelse(is.na(rural)==T,NA,ifelse(rural==1,0,1)))


#adjust classes
India_DLHS_AHS$urban <- as.factor(India_DLHS_AHS$urban)#factor variable
India_DLHS_AHS <-India_DLHS_AHS %>% 
mutate(urban_dbl = as.numeric(as.character(urban)),rural_dbl = as.numeric(as.character(rural)))#dbl variables

#urban_lab for table
India_DLHS_AHS <- mutate(India_DLHS_AHS,urban_lab=ifelse(is.na(urban)==T,NA,ifelse(urban==1,"urban","rural")))
India_DLHS_AHS$urban_lab<-as.factor(India_DLHS_AHS$urban_lab)

#check
summary(India_DLHS_AHS$urban)
#no missings

#3. sex####################

#define variable
India_DLHS_AHS <-India_DLHS_AHS %>% 
  mutate(sex = ifelse(is.na(female)==T,NA,ifelse(female==1,1,0)))

India_DLHS_AHS$sex <- as.factor(India_DLHS_AHS$sex)#factor variable
India_DLHS_AHS <-India_DLHS_AHS %>% 
  mutate(sex_dbl = as.numeric(as.character(sex))) #dbl variable

#sex_lab for table
India_DLHS_AHS <- mutate(India_DLHS_AHS,sex_lab=ifelse(is.na(sex)==T,NA,ifelse(sex==0,"male","female")))
India_DLHS_AHS$sex_lab<-as.factor(India_DLHS_AHS$sex_lab)

#check
summary(India_DLHS_AHS$sex)
#562 missings

#4. education####################

#definition of categories

India_DLHS_AHS <- India_DLHS_AHS %>%
mutate(educat_lcl_original=ifelse( is.na(hv12)==T & is.na(highest_qualification)==T ,NA,ifelse(DLHS==1, hv12, ifelse(DLHS==0, highest_qualification,NA))))#education variable containing all answers from the surveys


#"99"obs were defined as "illiterates" during the data cleaning process -> checked with Michaelas "educ" variable ("1_DLHS4_final_20170522.do" file->99 obs-> in lowest education category). Also in alternate DLHS-4 cleaning file "1_DLHS_codebook2.do" by Michaela, answer "99" for the variable hv12 is replaced by "0" ("illiterates") in the "educat_lcl" variable.
India_DLHS_AHS<-India_DLHS_AHS %>%mutate(educat_lcl_original=ifelse(is.na(educat_lcl_original)==T,NA,ifelse(DLHS==1&  educat_lcl_original==99,0,educat_lcl_original)))


#1= no FORMAL education
#2 below primary (below primary)
#3 completed primary (primary)
#4 some secondary (secondary/middle school)
#5 completed secondary (people who have higher secondafy/preuniversity etc)
#6 higher


India_DLHS_AHS<- India_DLHS_AHS %>%
  mutate(educat_lcl =ifelse(is.na(educat_lcl_original)==T | educat_lcl_original > 12,NA,
                                  ifelse(educat_lcl_original==0 & DLHS==1|DLHS==1 & educat_lcl_original==1|educat_lcl_original==0 & DLHS==0|DLHS==0 & educat_lcl_original==1, 1,
ifelse(DLHS==1&educat_lcl_original==2|DLHS==0&educat_lcl_original==2,2,
ifelse(DLHS==1&educat_lcl_original==3|DLHS==0&educat_lcl_original==3, 3,
ifelse(DLHS==1&educat_lcl_original==4|DLHS==0&educat_lcl_original==4 | DLHS==1&educat_lcl_original==5|DLHS==0&educat_lcl_original==5, 4,
ifelse(DLHS==1&educat_lcl_original==6|DLHS==0&educat_lcl_original==6,5,6)))))))


#education variable for table


India_DLHS_AHS <- mutate(India_DLHS_AHS,educat_lab=
                      ifelse(is.na(educat_lcl)==T,NA,
                             ifelse(educat_lcl==1,"No formal education",
                                   ifelse(educat_lcl==2, "< Primary",
                                          ifelse(educat_lcl==3 , "Primary",
                                                 ifelse(educat_lcl==4, " <Secondary",
                                                 ifelse(educat_lcl==5, "Secondary",
                                                        
                                                ifelse(educat_lcl==6,">Secondary",NA
 ))))))))
                             


#education variable for analysis
#definition of categories

#1 no FORMAL education & below primary (below primary)
#3 completed primary (primary)
#4 some secondary (secondary/middle school)
#5 completed secondary (people who have higher secondafy/preuniversity etc)
#6 higher

India_DLHS_AHS<-mutate(India_DLHS_AHS,ed_5=
                         ifelse(is.na(educat_lcl)==T,NA,
                                ifelse(educat_lcl==1|educat_lcl==2,1,
                                       ifelse(educat_lcl==3,3,
                                              ifelse(educat_lcl==4,4,
                                                     ifelse(educat_lcl==5,5,
                                                            ifelse(educat_lcl==6,6,NA)))))))

India_DLHS_AHS$ed_5<-as.factor(as.character(India_DLHS_AHS$ed_5)) 
India_DLHS_AHS <- within(India_DLHS_AHS, ed_5 <- relevel(ed_5, ref = "1")) #factor variable

#check
summary(as.factor(India_DLHS_AHS$educat_lcl_original))
#6370 NAs
summary(as.factor(India_DLHS_AHS$educat_lcl))
#7131 NAs (-943 people with "96" value->missing)
summary(India_DLHS_AHS$ed_5)
#7131 NAs

#ed_4

India_DLHS_AHS<-mutate(India_DLHS_AHS,ed_4=
                    ifelse(is.na(educat_lcl)==T,NA,
                           ifelse(educat_lcl==1|educat_lcl==2,1,
                                  ifelse(educat_lcl==3,3,
                                         ifelse(educat_lcl==4,4,
                                                ifelse(educat_lcl==5 |educat_lcl==6,5,NA
                                                ))))))

India_DLHS_AHS$ed_4<-as.factor(as.character(India_DLHS_AHS$ed_4))
India_DLHS_AHS <- within(India_DLHS_AHS, ed_4 <- relevel(ed_4, ref = "1")) #factor variable

#check
summary(as.factor(India_DLHS_AHS$educat_lcl))
summary(India_DLHS_AHS$ed_4)
#7313 missing

#5. district household wealth quintile####################

India_DLHS_AHS$asset_index_combined<-India_DLHS_AHS$ai_NAT_rurb#asset indec from PCA analysis
India_DLHS_AHS$wealth_quintile_rurb<-India_DLHS_AHS$q_ai_nat_rurb # national household wealth quintile, seperatels for rural and urban areas

#calculate urban proportion within district to define variable
India_DLHS_AHS <- mutate(India_DLHS_AHS,urban_all=ifelse(is.na(urban)==T,NA,1))#everybody who has an observation for urban/rural

India_DLHS_AHS<- India_DLHS_AHS %>%group_by(ex_d_name_ind)%>%mutate(count_nom_u=sum(urban_dbl,na.rm=TRUE), count_denom_u=sum(urban_all,na.rm=TRUE),count_nom_r=sum(rural_dbl,na.rm=TRUE), count_denom_r=sum(urban_all,na.rm=TRUE) )#calculating sum of cases by district

India_DLHS_AHS<- dplyr::mutate(India_DLHS_AHS,urban_prop =(count_nom_u/count_denom_u)*100,rural_prop =(count_nom_r/count_denom_r)*100 )#percentage

India_DLHS_AHS<-India_DLHS_AHS%>%ungroup()

#define district wealth quintiles

India_DLHS_AHS<-India_DLHS_AHS%>%group_by(ex_d_name_ind,urban)%>%mutate(
hh_wealth_quintile_district=
  ifelse(is.na(asset_index_combined)==T, NA,
         ifelse(urban_prop>=5 & rural_prop>=5 ,ntile(asset_index_combined,5),NA)))
#urban/rural prop >= 5% for 519/561 districts
India_DLHS_AHS<-India_DLHS_AHS%>%ungroup()  

India_DLHS_AHS<-India_DLHS_AHS%>%group_by(ex_d_name_ind)%>%mutate(hh_wealth_quintile_district=
  ifelse(is.na(asset_index_combined)==T, NA,             
          ifelse(urban_prop<5 | rural_prop<5  ,ntile(asset_index_combined,5),hh_wealth_quintile_district)))
                                                                 
India_DLHS_AHS<-India_DLHS_AHS%>%ungroup()  

India_DLHS_AHS$hh_wealth_quintile_district<-as.factor(India_DLHS_AHS$hh_wealth_quintile_district)
India_DLHS_AHS <- within(India_DLHS_AHS, hh_wealth_quintile_district <- relevel(hh_wealth_quintile_district, ref = "1")) #factor variable

India_DLHS_AHS <- India_DLHS_AHS %>% 
  mutate(hh_wealth_quintile_district_c = as.numeric(as.character(hh_wealth_quintile_district)))#dbl variable

#check
summary(as.factor(India_DLHS_AHS$educat_lcl_original))
#6370 NAs
summary(as.factor(India_DLHS_AHS$educat_lcl))
#7131 NAs (-943 people with "96" value->missing)
summary(India_DLHS_AHS$ed_5)
#7131 NAs



#create variable that combines bottom and top wealth quintiles 
#definition of groups

#1 poor (wealth quintile 1 & 2)
#2 medium richt (wealth quintile 3)
#3 rich ( wealth quintile 4 & 5)

India_DLHS_AHS<-mutate(India_DLHS_AHS, hh_wealth_quintile_groups_district=
ifelse(is.na(hh_wealth_quintile_district_c)==T, NA,
ifelse(hh_wealth_quintile_district_c==1 | hh_wealth_quintile_district_c==2,0,
ifelse(hh_wealth_quintile_district_c==3,1,
ifelse(hh_wealth_quintile_district_c==4 | hh_wealth_quintile_district_c==5,2,NA
 )))))

India_DLHS_AHS$hh_wealth_quintile_groups_district<-as.factor(as.character(India_DLHS_AHS$hh_wealth_quintile_groups_district))
India_DLHS_AHS <- within(India_DLHS_AHS, hh_wealth_quintile_groups_district <- relevel(hh_wealth_quintile_groups_district, ref = "0")) #factor variable



#6. national household wealth quintile####################

India_DLHS_AHS$wealth_quintile_rurb<-as.factor(India_DLHS_AHS$wealth_quintile_rurb)

India_DLHS_AHS <- India_DLHS_AHS %>% 
  mutate(wealth_quintile_rurb_c = as.numeric(as.character(wealth_quintile_rurb)))#dbl variable


#create variable that combines bottom and top wealth quintiles 
#definition of groups

#1 poor (wealth quintile 1 & 2)
#2 medium richt (wealth quintile 3)
#3 rich ( wealth quintile 4 & 5)

India_DLHS_AHS<-mutate(India_DLHS_AHS, wealth_quintile_rurb_groups=
ifelse(is.na(wealth_quintile_rurb_c)==T, NA,
ifelse(wealth_quintile_rurb_c==1 | wealth_quintile_rurb_c==2,0,
ifelse(wealth_quintile_rurb_c==3,1,
ifelse(wealth_quintile_rurb_c==4 | wealth_quintile_rurb_c==5,2,NA
 )))))

India_DLHS_AHS$wealth_quintile_rurb_groups<-as.factor(as.character(India_DLHS_AHS$wealth_quintile_rurb_groups))
India_DLHS_AHS <- within(India_DLHS_AHS, wealth_quintile_rurb_groups <- relevel( wealth_quintile_rurb_groups, ref= "0")) #factor variable





#7. scales asset index###############

India_DLHS_AHS<-mutate(India_DLHS_AHS,asset_index_combined_s=(
                     asset_index_combined-(mean(asset_index_combined, na.rm=TRUE)))/( sd(asset_index_combined,na.rm=TRUE)))



#check
summary(India_DLHS_AHS$asset_index_combined)
#70079 NAs
summary(India_DLHS_AHS$wealth_quintile_rurb)
#70079 NAs
summary(India_DLHS_AHS$hh_wealth_quintile_district)
#70079 NAs
summary(India_DLHS_AHS$hh_wealth_quintile_groups_district)
#70079 NA
summary(India_DLHS_AHS$asset_index_combined_s)
#70079 NA
summary(India_DLHS_AHS$wealth_quintile_rurb_groups)
#70079 NA

```

#Filter 
```{r filter sample size}

India_DLHS_AHS<- India_DLHS_AHS %>% group_by(ex_d_name_ind)%>%mutate(n= n())%>%ungroup()
India_DLHS_AHS<- dplyr::filter(India_DLHS_AHS,n>=100)

#how districts have are still included?
test<-India_DLHS_AHS%>%group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%ungroup()
test<-dplyr::filter(test,n>=100)
#559/561

```

```{r District-level indicators (level 2 variables)}

#1. Load GDP/Capita (PCI) (from planning comission) and merge#####################
PCI_districts<-read_csv("PCI_districts_DLHS_4_AHS_08_19_clean.csv")

India_DLHS_AHS<-left_join(India_DLHS_AHS,PCI_districts,by="ex_d_name_ind")

India_DLHS_AHS$PCI_districts <- as.numeric(India_DLHS_AHS$PCI_districts)

#check if there is an unmatched value from PCI district list
test<-anti_join(India_DLHS_AHS,PCI_districts,by="ex_d_name_ind")
#0 obs


#which districts have a non-missings PCI value?
test<-India_DLHS_AHS%>%group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%ungroup()
test<-dplyr::filter(test,is.na(PCI_districts)==F)
#467/559 districts (467/561 districts (all districts, not filtered for districts with sample size >=100))

summary(India_DLHS_AHS$PCI_districts)
#277158  NAs

#2. educational attainment and literacy##############################

##########female literacy

#literacy definition
India_DLHS_AHS <- mutate(India_DLHS_AHS,literacy=ifelse(is.na(educat_lcl_original)==T | educat_lcl_original>12,NA,ifelse(educat_lcl_original>0,1,0)))

#check
summary(as.factor(India_DLHS_AHS$educat_lcl))
summary(as.factor(India_DLHS_AHS$educat_lcl_original))
summary(as.factor(India_DLHS_AHS$literacy))#7313 missings

#all female literates
India_DLHS_AHS <- mutate(India_DLHS_AHS,edc_l=ifelse(is.na(literacy)==T | is.na(sex)==T,NA,ifelse(literacy==1 & sex==1,1,0)))

#check
summary(as.factor(India_DLHS_AHS$edc_l))
#7873 missings (7313 literacy + 560(in filtered version missings sex))

#everybody who has a observation for literacy female
India_DLHS_AHS <- mutate(India_DLHS_AHS,edc_l0=ifelse(is.na(literacy)==F & sex==1,1,0))

#check
summary(as.factor(India_DLHS_AHS$edc_l0))
# 768906 1's 
# 771995 with sex==1 with - 3089 females with NA in literacy

############## percentage of men and women who completed at least primary education

#participants who completed at keast primary education
India_DLHS_AHS <- mutate(India_DLHS_AHS,edc_2=ifelse(is.na(educat_lcl)==T,NA,ifelse(educat_lcl>=3,1,0)))

#check
summary(as.factor(India_DLHS_AHS$educat_lcl))
summary(as.factor(India_DLHS_AHS$edc_2))
#1015392 1's (equals educat_lcl 3+4+5+6)

#everybody who has a observation for educat_lcl
India_DLHS_AHS <- mutate(India_DLHS_AHS,edc_0=ifelse(is.na(educat_lcl)==T ,NA,1))

#check
summary(as.factor(India_DLHS_AHS$educat_lcl))
summary(as.factor(India_DLHS_AHS$edc_0))
#only 7313 obs not 1 (equals educat_lcl missing)

#########calculating sum of cases by district for both
India_DLHS_AHS<- India_DLHS_AHS %>%group_by(ex_d_name_ind)%>%mutate(count_nom=sum(edc_2,na.rm=TRUE), count_denom=sum(edc_0,na.rm=TRUE))

India_DLHS_AHS<- India_DLHS_AHS %>%group_by(ex_d_name_ind)%>%mutate(count_nom_l=sum(edc_l,na.rm=TRUE), count_denom_l=sum(edc_l0,na.rm=TRUE))


########percentage
India_DLHS_AHS<- dplyr::mutate(India_DLHS_AHS,ed_att_new =(count_nom/count_denom)*100)#unweighted

India_DLHS_AHS<- dplyr::mutate(India_DLHS_AHS,literacy_rate_female =(count_nom_l/count_denom_l)*100)

India_DLHS_AHS<-India_DLHS_AHS%>%ungroup()


#check
summary(India_DLHS_AHS$literacy_rate_female)
#no missings
summary(India_DLHS_AHS$ed_att_new)
#no missings,




#3. urban/rural proportion##############################


#everybody who has an observation for urban/rural
India_DLHS_AHS <- mutate(India_DLHS_AHS,urban_all=ifelse(is.na(urban)==T,NA,1))

#check
summary(as.factor(India_DLHS_AHS$urban_all))
#no missings

#########calculating sum of cases by district
India_DLHS_AHS<- India_DLHS_AHS %>%group_by(ex_d_name_ind)%>%mutate(count_nom_u=sum(urban_dbl,na.rm=TRUE), count_denom_u=sum(urban_all,na.rm=TRUE),count_nom_r=sum(rural_dbl,na.rm=TRUE), count_denom_r=sum(urban_all,na.rm=TRUE) )

########percentage
India_DLHS_AHS<- dplyr::mutate(India_DLHS_AHS,urban_prop =(count_nom_u/count_denom_u)*100,rural_prop =(count_nom_r/count_denom_r)*100 )

India_DLHS_AHS<-India_DLHS_AHS%>%ungroup()


#check
summary(India_DLHS_AHS$urban_prop)
#no missings




#4. median wealth##############################


######seperate for urban and rural for district level regressions
India_DLHS_AHS <- India_DLHS_AHS %>% 
  group_by(ex_d_name_ind,urban_lab) %>% 
  mutate(medianai_r_u = median(asset_index_combined_s, na.rm=TRUE))%>% 
  ungroup()

####not seperate by urban/rural for multilevl regressions
India_DLHS_AHS <- India_DLHS_AHS %>% 
  group_by(ex_d_name_ind) %>% 
  mutate(medianai= median(asset_index_combined_s, na.rm=TRUE),mean=mean(asset_index_combined_s, na.rm=TRUE))%>% 
  ungroup()


#check
summary(India_DLHS_AHS$medianai)
summary(India_DLHS_AHS$medianai_r_u)
#no missings


```



#Sample Characteristics

```{r TABLE 1}

India_DLHS_AHS$literacy<-as.factor(India_DLHS_AHS$literacy)
India_DLHS_AHS$fasted<-as.factor(India_DLHS_AHS$fasted)

#variables that are included
table1names <- c("ex_diab_narrow_ind","ex_htn_narrow_ind","bmigrt27.5","bmi_grp","csmoke","age_grp","sex_lab","urban_lab","hh_wealth_quintile_district","educat_lab","literacy","age","edc_2", "ed_5","wealth_quintile_rurb", "fasted")

#create tables
totalmiss<- CreateTableOne(vars = table1names, data=India_DLHS_AHS, includeNA=FALSE)
print(totalmiss)
sexmiss <- CreateTableOne(vars = table1names, data=India_DLHS_AHS, strata = "sex_lab", includeNA=FALSE)
print(sexmiss)

#for percentage who have fasted in DLHS-4
fast<- CreateTableOne(vars = table1names, data=India_DLHS_AHS, strata = "DLHS",includeNA=FALSE)
print(fast)


#write table in excel so copy and paste is possible to word
DLHS_AHS_total <- print(totalmiss, exact = "stage", quote = FALSE, noSpaces = TRUE, printToggle = FALSE)
DLHS_AHS_sexmiss<-print(sexmiss, exact = "stage", quote = FALSE, noSpaces = TRUE, printToggle = FALSE)
write.csv(DLHS_AHS_total, file = "DLHS_AHS_total.csv")
write.csv(DLHS_AHS_sexmiss, file = "DLHS_AHS_sexmiss.csv")


```







# Discussion analyses

```{r CVD risk factors onto household wealth whole dataset + subset}

#Logistic regression of CVD risk factors onto household wealth with district-level fixed effects

#make sure reference variable is the same
India_DLHS_AHS$ex_d_name_ind<-as.factor(India_DLHS_AHS$ex_d_name_ind)
dlevel_fixed_effects_analysis <- within(India_DLHS_AHS,ex_d_name_ind<- relevel(ex_d_name_ind, ref = "Araria"))

#create subset

districts <-India_DLHS_AHS%>%group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%ungroup
#quantile(districts$ed_att_new, .2,na.rm=TRUE)
#48.94
#subset<-dplyr::filter(districts, ed_att_new<48.94)

subset<-dplyr::filter(India_DLHS_AHS, ed_att_new<48.94)
subset <- within(subset,ex_d_name_ind<- relevel(ex_d_name_ind, ref = "Araria"))


##### diabetes

##glm
#whole dataset
mod_1<-speedglm(ex_diab_narrow_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=dlevel_fixed_effects_analysis) 

coeffs <-mod_1%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_1.csv")

#subset
mod_1b<-speedglm(ex_diab_narrow_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=subset) 

coeffs <-mod_1b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_1b.csv")


##lm
#whole dataset
mod_1<-speedlm(ex_diab_narrow_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=dlevel_fixed_effects_analysis) 

coeffs <-mod_1%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_1_lm.csv")

#subset
mod_1b<-speedlm(ex_diab_narrow_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=subset) 

coeffs <-mod_1b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_1b_lm.csv")






##### hypertension

##glm
#whole dataset
mod_2<-speedglm(ex_htn_narrow_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=dlevel_fixed_effects_analysis) 

coeffs <-mod_2%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_2.csv")

#subset
mod_2b<-speedglm(ex_htn_narrow_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=subset) 

coeffs <-mod_2b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_2b.csv")


##lm
#whole dataset
mod_2<-speedlm(ex_htn_narrow_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=dlevel_fixed_effects_analysis) 

coeffs <-mod_2%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_2_lm.csv")

#subset
mod_2b<-speedlm(ex_htn_narrow_ind_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=subset) 

coeffs <-mod_2b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_2b_lm.csv")




##### bmi

##glm
#whole dataset
mod_3<-speedglm(bmigrt27.5_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=dlevel_fixed_effects_analysis) 

coeffs <-mod_3%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_3.csv")

#subset
mod_3b<-speedglm(bmigrt27.5_dbl~hh_wealth_quintile_district+ex_d_name_ind,family=binomial(link = "logit"),data=subset) 
coeffs <-mod_3b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_3b.csv")

##lm
#whole dataset
mod_3<-speedlm(bmigrt27.5_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=dlevel_fixed_effects_analysis) 

coeffs <-mod_3%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_3_lm.csv")

#subset
mod_3b<-speedlm(bmigrt27.5_dbl~hh_wealth_quintile_district+ex_d_name_ind,data=subset) 

coeffs <-mod_3b%>%tidy(conf.int=TRUE,method="Wald")
write.csv(coeffs,"DLHS_mod_3b_lm.csv")





```

```{r Household wealth on educational attainment}
#Ordinary least square regression of household wealth on educational attainment




mod<-lm(hh_wealth_quintile_district_c ~ as.factor(ed_5),data=India_DLHS_AHS) 
summary(mod)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"DLHS_mod_wealth_ed_lm.csv")


```


```{r Association of a district’s primary school completion rate with the difference in the continuous household wealth index between highest and lowest household wealth quintile}

#######Association of a district’s primary school completion rate with the difference in the continuous household wealth index between highest and lowest household wealth quintile

#### district wise 


Ana_d <-India_DLHS_AHS

#drop districts with <20 observations in wealth quintile 1 or 5


#filter districts >=20 cases
Ana_d<-mutate(Ana_d,
                   wq_1_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   
                   wq_5_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
)


Ana_d<- Ana_d %>%group_by(ex_d_name_ind)%>%mutate(count_wq_1_o=sum(wq_1_o,na.rm=TRUE), count_wq_5_o=sum(wq_5_o,na.rm=TRUE))


Ana_d<-Ana_d %>% dplyr::filter(count_wq_1_o >=20 & count_wq_5_o>=20)%>%ungroup()

#check numbers
Ana_d%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#559


#calculate mean asset index and the difference between mean asset index of wealth quintile 1 and 5
hwq1_df<-Ana_d%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_district==1)%>%summarise(mean_hwq_1_districts=mean(asset_index_combined_s,na.rm=TRUE))
hwq5_df<-Ana_d%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_district==5)%>%summarise(mean_hwq5_districts=mean(asset_index_combined_s,na.rm=TRUE))

analysis_wq_district<-left_join(Ana_d,hwq1_df, by="ex_d_name_ind")
analysis_wq_district<-left_join(analysis_wq_district,hwq5_df, by="ex_d_name_ind")
analysis_wq_district<-mutate(analysis_wq_district,diff_hwq5_hwq1=mean_hwq5_districts-mean_hwq_1_districts)


#one observation/district
analysis_wq_district <- analysis_wq_district%>%dplyr::select(ex_d_name_ind,ed_att_new,diff_hwq5_hwq1)%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%ungroup

#lm analysis
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=analysis_wq_district) 
summary(mod)


#graph 
fig<- analysis_wq_district %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference of mean wealth indices",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c(1,2,3,4),limits=c(0.5,4.4))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=20/1, expand=F)


print(fig)




#### district-wise stratified by urban residence

Ana_d_ru <-India_DLHS_AHS

#drop districts with <20 observations in wealth quintile 1 or 5 (stratisfied by rural/urban residence)


#filter districts >=20 cases
Ana_d_ru<-mutate(Ana_d_ru,
                   wq_1_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   
                   wq_5_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
)

Ana_d_ru<- Ana_d_ru %>%group_by(ex_d_name_ind,urban)%>%mutate(count_wq_1_o=sum(wq_1_o,na.rm=TRUE), count_wq_5_o=sum(wq_5_o,na.rm=TRUE))


Ana_d_ru<-Ana_d_ru %>% dplyr::filter(count_wq_1_o >=20 & count_wq_5_o>=20)%>%ungroup()

#check numbers
#urban
Ana_d_ru%>% group_by(ex_d_name_ind)%>%dplyr::filter(urban==1)%>%dplyr::filter(row_number()==1)
#516

#rural
Ana_d_ru%>% group_by(ex_d_name_ind)%>%dplyr::filter(urban==0)%>%dplyr::filter(row_number()==1)
#550

#calculate mean asset index and the difference between mean asset index of wealth quintile 1 and 5
hwq1_df<-Ana_d_ru%>%group_by(ex_d_name_ind,urban)%>%dplyr::filter(hh_wealth_quintile_district==1)%>%summarise(mean_hwq_1_districts=mean(asset_index_combined_s,na.rm=TRUE))
hwq5_df<-Ana_d_ru%>%group_by(ex_d_name_ind,urban)%>%dplyr::filter(hh_wealth_quintile_district==5)%>%summarise(mean_hwq5_districts=mean(asset_index_combined_s,na.rm=TRUE))

analysis_wq_district_r_u<-left_join(Ana_d_ru,hwq1_df, by=c("ex_d_name_ind","urban"))
analysis_wq_district_r_u<-left_join(analysis_wq_district_r_u,hwq5_df, by=c("ex_d_name_ind","urban"))
analysis_wq_district_r_u<-mutate(analysis_wq_district_r_u,diff_hwq5_hwq1=mean_hwq5_districts-mean_hwq_1_districts)


#one observation/district
analysis_wq_district_r_u <-analysis_wq_district_r_u%>%dplyr::select(ex_d_name_ind,ed_att_new,diff_hwq5_hwq1,urban,urban_lab)%>% group_by(ex_d_name_ind,urban)%>%dplyr::filter(row_number()==1)%>%ungroup


#lm analysis
urban<-dplyr::filter(analysis_wq_district_r_u,urban==1)
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=urban) 
summary(mod)

rural<-dplyr::filter(analysis_wq_district_r_u,urban==0)
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=rural) 
summary(mod)

#graph
labels<-c (urban="Urban", rural= "Rural")

fig<- analysis_wq_district_r_u %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference of mean wealth indices",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
scale_y_continuous(breaks = c(1,2,3,4),limits=c(0.5,4.4))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=32/1, expand=F)


print(fig)





#### national

#drop districts with <20 observations in wealth quintile 1 or 5
Ana_n <-India_DLHS_AHS

Ana_n<-mutate(Ana_n,
                   wq_1_o=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==1,1,0)),
                   
                   wq_5_o=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==5,1,0))
)

Ana_n<- Ana_n %>%group_by(ex_d_name_ind)%>%mutate(count_wq_1_o=sum(wq_1_o,na.rm=TRUE), count_wq_5_o=sum(wq_5_o,na.rm=TRUE))

##filter districts >=20 cases
Ana_n<-Ana_n %>% dplyr::filter(count_wq_1_o >=20 & count_wq_5_o>=20)%>%ungroup()

##check numbers
Ana_n%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#510 districts

#calculate mean asset index and the difference between mean asset index of wealth quintile 1 and 5
hwq1_df<-Ana_n%>%group_by(ex_d_name_ind)%>%dplyr::filter(wealth_quintile_rurb==1)%>%summarise(mean_hwq_1_districts=mean(asset_index_combined_s,na.rm=TRUE))
hwq5_df<-Ana_n%>%group_by(ex_d_name_ind)%>%dplyr::filter(wealth_quintile_rurb==5)%>%summarise(mean_hwq5_districts=mean(asset_index_combined_s,na.rm=TRUE))

analysis_wq_national<-left_join(Ana_n,hwq1_df, by="ex_d_name_ind")
analysis_wq_national<-left_join(analysis_wq_national,hwq5_df, by="ex_d_name_ind")
analysis_wq_national<-mutate(analysis_wq_national,diff_hwq5_hwq1=mean_hwq5_districts-mean_hwq_1_districts)


#one observation/district
analysis_wq_national <- analysis_wq_national%>%dplyr::select(ex_d_name_ind,ed_att_new,diff_hwq5_hwq1)%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%ungroup

#lm analysis
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=analysis_wq_national) 
summary(mod)

#graph 
fig<- analysis_wq_national %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference of mean wealth indices",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c(1,2,3,4),limits=c(0.5,4.4))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=20/1, expand=F)


print(fig)



#### national stratified by urban residence

#drop districts with <20 observations in wealth quintile 1 or 5
Ana_n_ru <-India_DLHS_AHS

Ana_n_ru<-mutate(Ana_n_ru,
                   wq_1_o=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==1,1,0)),
                   
                   wq_5_o=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==5,1,0))
)

Ana_n_ru<- Ana_n_ru %>%group_by(ex_d_name_ind,urban)%>%mutate(count_wq_1_o=sum(wq_1_o,na.rm=TRUE), count_wq_5_o=sum(wq_5_o,na.rm=TRUE))

##filter districts >=20 cases
Ana_n_ru<-Ana_n_ru %>% dplyr::filter(count_wq_1_o >=20 & count_wq_5_o>=20)%>%ungroup()

##check numbers
Ana_n_ru%>% group_by(ex_d_name_ind)%>%dplyr::filter(urban==1)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#371

Ana_n_ru%>% group_by(ex_d_name_ind)%>%dplyr::filter(urban==0)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#444

#calculate mean asset index and the difference between mean asset index of wealth quintile 1 and 5
hwq1_df<-Ana_n_ru%>%group_by(ex_d_name_ind,urban)%>%dplyr::filter(wealth_quintile_rurb==1)%>%summarise(mean_hwq_1_districts=mean(asset_index_combined_s,na.rm=TRUE))
hwq5_df<-Ana_n_ru%>%group_by(ex_d_name_ind,urban)%>%dplyr::filter(wealth_quintile_rurb==5)%>%summarise(mean_hwq5_districts=mean(asset_index_combined_s,na.rm=TRUE))

analysis_wq_national_r_u<-left_join(Ana_n_ru,hwq1_df, by=c("ex_d_name_ind","urban"))
analysis_wq_national_r_u<-left_join(analysis_wq_national_r_u,hwq5_df, by=c("ex_d_name_ind","urban"))
analysis_wq_national_r_u<-mutate(analysis_wq_national_r_u,diff_hwq5_hwq1=mean_hwq5_districts-mean_hwq_1_districts)

#one observation/district
analysis_wq_national_r_u <- analysis_wq_national_r_u%>%dplyr::select(ex_d_name_ind,ed_att_new,diff_hwq5_hwq1,urban,urban_lab)%>% group_by(ex_d_name_ind,urban)%>%dplyr::filter(row_number()==1)%>%ungroup

#lm analysis
urban<-dplyr::filter(analysis_wq_national_r_u,urban==1)
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=urban) 
summary(mod)

rural<-dplyr::filter(analysis_wq_national_r_u,urban==0)
mod<-lm(diff_hwq5_hwq1 ~ed_att_new,data=rural) 
summary(mod)


#graph
labels<-c (urban="Urban", rural= "Rural")
fig<- analysis_wq_national_r_u %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diff_hwq5_hwq1, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference of mean wealth indices",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
scale_y_continuous(breaks = c(1,2,3,4),limits=c(0.5,4.4))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=32/1, expand=F)

print(fig)





```
#District level regressions using wealth quintiles computed for each district: top vs bottom
######linear and logistic regressions 
```{r filter districts with no contrasts}

Ana1<-India_DLHS_AHS

Ana1<-dplyr::select(Ana1,ex_d_name_ind,ex_diab_narrow_ind,ex_diab_narrow_ind_ahs_unfasted,ex_htn_narrow_ind,bmigrt27.5,csmoke,ex_diab_narrow_ind_dbl,ex_diab_narrow_ind_ahs_unfasted_dbl,ex_htn_narrow_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,urban_lab,hh_wealth_quintile_district,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai_r_u)



Ana1 <- dplyr::filter(Ana1, urban_prop>=5 & rural_prop>=5)


#drop districts with <50 cases in low or high SES category


##test
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#517 districts
test<-Ana1%>%group_by(ex_d_name_ind,hh_wealth_quintile_district)%>%summarize(sum=n())
test<-dplyr::filter(test, sum<50)
dplyr::filter(test, hh_wealth_quintile_district==1 | hh_wealth_quintile_district==5)%>%summarize(sum=n())
#0 districts



Ana1<-mutate(Ana1,
                   wq_1_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   
                   wq_5_o=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
)

Ana1<- Ana1 %>%group_by(ex_d_name_ind)%>%mutate(count_wq_1_o=sum(wq_1_o,na.rm=TRUE), count_wq_5_o=sum(wq_5_o,na.rm=TRUE))

##filter districts >=50 cases
Ana1<-Ana1 %>% dplyr::filter(count_wq_1_o >=50 & count_wq_5_o>=50)%>%ungroup()

##check numbers
Ana1%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#517 (517-0)



#drop districts with too few cases in lower & higher SES category/CVD risk factor/district


##test if code is working
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#517 districts
test<-test%>%group_by(ex_d_name_ind,hh_wealth_quintile_district)%>%summarize(diabetes_cases= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases=sum(csmoke_dbl,na.rm=TRUE))
test_d<-dplyr::filter(test, diabetes_cases<10)
test_d %>% dplyr::filter(hh_wealth_quintile_district==5)%>%summarize(sum=n())
#86 districts
test_d %>% dplyr::filter(hh_wealth_quintile_district==1)%>%summarize(sum=n())
#140 districts
test_h<-dplyr::filter(test, hypertension_cases<10)
test_h %>% dplyr::filter(hh_wealth_quintile_district==5)%>%summarize(sum=n())
#0 districts
test_h %>% dplyr::filter(hh_wealth_quintile_district==1)%>%summarize(sum=n())
#1 district
test_o<-dplyr::filter(test, obesity_cases<10)
test_o %>% dplyr::filter(hh_wealth_quintile_district==5)%>%summarize(sum=n())
#79 districts
test_o %>% dplyr::filter(hh_wealth_quintile_district==1)%>%summarize(sum=n())
#195 districts
test_c<-dplyr::filter(test, csmoke_cases<10)
test_c %>% dplyr::filter(hh_wealth_quintile_district==5)%>%summarize(sum=n())
#34 districts
test_c %>% dplyr::filter(hh_wealth_quintile_district==1)%>%summarize(sum=n())
#13 districts






##count district cases of diabetes/hypertension/obesity/csmoke in higher SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_district==5)%>%summarize(diabetes_cases_5= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases_5=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases_5=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_5=sum(csmoke_dbl,na.rm=TRUE))



##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_5)
diabetes_analysis<-left_join(Ana1, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_5)
hypertension_analysis<-left_join(Ana1, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_5)
obesity_analysis<-left_join(Ana1, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_5)
csmoke_analysis<-left_join(Ana1, csmoke_list, by="ex_d_name_ind")

#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_5<10)%>%summarize(sum=n())#86 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_5<10)%>%summarize(sum=n())#0 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_5<10)%>%summarize(sum=n())#79 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_5<10)%>%summarize(sum=n())
#34 districts


##count district cases of diabetes/hypertension/obesity/csmoke in lower SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_district==1)%>%summarize(diabetes_cases_1= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases_1=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases_1=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_1=sum(csmoke_dbl,na.rm=TRUE))


##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_1)
diabetes_analysis<-left_join(diabetes_analysis, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_1)
hypertension_analysis<-left_join(hypertension_analysis, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_1)
obesity_analysis<-left_join(obesity_analysis, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_1)
csmoke_analysis<-left_join(csmoke_analysis, csmoke_list, by="ex_d_name_ind")


#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_1<10)%>%summarize(sum=n())#140 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_1<10)%>%summarize(sum=n())#1 district
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_1<10)%>%summarize(sum=n())#195 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_1<10)%>%summarize(sum=n())
#13 districts


#calculate sum of cases in higher and lower SES category for each CVD risk factor & exclude districts with <20 sum of cases (in higher and lower SES category)/district for the individual CVD risk factor

##diabetes
diabetes_analysis<-diabetes_analysis%>%mutate(sum_cases=diabetes_cases_1+diabetes_cases_5)
diabetes_analysis<-diabetes_analysis%>%dplyr::filter(sum_cases>=20)


##hypertension
hypertension_analysis<-hypertension_analysis%>%mutate(sum_cases=hypertension_cases_1+hypertension_cases_5)
hypertension_analysis<-hypertension_analysis%>%dplyr::filter(sum_cases>=20)


##obesity
obesity_analysis<-obesity_analysis%>%mutate(sum_cases=obesity_cases_1+obesity_cases_5)
obesity_analysis<-obesity_analysis%>%dplyr::filter(sum_cases>=20)

##csmooke
csmoke_analysis<-csmoke_analysis%>%mutate(sum_cases=csmoke_cases_1+csmoke_cases_5)
csmoke_analysis<-csmoke_analysis%>%dplyr::filter(sum_cases>=20)



## check number of districts that have not been dropped
diabetes_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#408->517-408= 109 rows removed from graphs
hypertension_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#517->517-517= 0 rows removed from graphs
obesity_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#413->517-413= 104 rows removed from graphs
csmoke_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#503->517-503= 14 rows removed from graphs





```


```{r Regression analysis district level glm}


#####diabetes

#logistic regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_diab_narrow_ind ~ age_grp+urban+sex+hh_wealth_quintile_district,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####hypertension

#logistic regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_htn_narrow_ind ~ age_grp+urban+sex+hh_wealth_quintile_district,family=binomial(link = "logit"), .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####obesity

#logistic regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(bmigrt27.5 ~ age_grp+urban+sex+hh_wealth_quintile_district,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


#####csmoke

#logistic regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(csmoke ~ age_grp+urban+sex+hh_wealth_quintile_district,family=binomial(link = "logit"), .))


#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


```

```{r Regression analysis district level lm}


#####diabetes

#linear regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_diab_narrow_ind_dbl ~ age_grp+sex+urban+hh_wealth_quintile_district, .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




#####hypertension

#linear regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_htn_narrow_ind_dbl ~ age_grp+urban+sex+hh_wealth_quintile_district, .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


######obesity

#linear regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(bmigrt27.5_dbl ~ age_grp+urban+sex+hh_wealth_quintile_district, .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)



#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######csmoke

#linear regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(csmoke_dbl ~ age_grp+urban+sex+hh_wealth_quintile_district, .))


#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="hh_wealth_quintile_district5")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




```









```{r graphs: filter: one value/district }

#Ana1_DLHS_w<-Ana1

#only 1 value per district (for rural/urban 2)
Ana2 <- Ana1_DLHS_w%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,ed_att_new,PCI_districts,literacy_rate_female,urban_prop)%>%ungroup

Ana2_urban_rural <- Ana1_DLHS_w%>% group_by(ex_d_name_ind,urban_lab)%>%dplyr::filter(row_number()==1)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,medianai_r_u,urban_lab,urban)%>%ungroup


```
#### graphs 
```{r graphs:educational attainment}

##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)

print(fig)

mod<-lm((diab_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)


print(fig)

mod<-lm((diab_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)



##hypertension

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)

print(fig)

mod<-lm((htn_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)



##bmi

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
 ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
       axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
      plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
       strip.background = element_blank())+
scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ed_att_new,data=Ana2)
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


```












#District level regressions using educational attainment (bottom category vs top two categories)
######linear and logistic regressions 

```{r Regression analysis filter districts with no contrasts}

Ana1<-India_DLHS_AHS


Ana1<-dplyr::select(Ana1,ex_d_name_ind,ex_diab_narrow_ind,ex_diab_narrow_ind_ahs_unfasted,ex_htn_narrow_ind,bmigrt27.5,csmoke,ex_diab_narrow_ind_dbl,ex_diab_narrow_ind_ahs_unfasted_dbl,ex_htn_narrow_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,urban_lab,ed_5,ed_4,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai_r_u)



Ana1 <- dplyr::filter(Ana1, urban_prop>=5 & rural_prop>=5)

#drop districts with <50 cases in low or high SES category


##test
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#517 districts
test<-Ana1%>%group_by(ex_d_name_ind,ed_4)%>%summarize(sum=n())
test<-dplyr::filter(test, sum<50)
dplyr::filter(test, ed_4==1 | ed_4==5)%>%summarize(sum=n())
#1 districts



Ana1<-mutate(Ana1,
                   ed_1_o=ifelse(is.na(ed_4)==T,NA,ifelse(ed_4==1,1,0)),
                   
                   ed_5_o=ifelse(is.na(ed_4)==T,NA,ifelse(ed_4==5,1,0))
)

Ana1<- Ana1 %>%group_by(ex_d_name_ind)%>%mutate(count_ed_1_o=sum(ed_1_o,na.rm=TRUE), count_ed_5_o=sum(ed_5_o,na.rm=TRUE))

##filter districts >=50 cases
Ana1<-Ana1 %>% dplyr::filter(count_ed_1_o >=50 & count_ed_5_o>=50)%>%ungroup()

##check numbers
Ana1%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#516 (517-1)



#drop districts with too few cases in lower & higher SES category/CVD risk factor/district


##test if code is working
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#516 districts
test<-test%>%group_by(ex_d_name_ind,ed_4)%>%summarize(diabetes_cases= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases=sum(csmoke_dbl,na.rm=TRUE))
test_d<-dplyr::filter(test, diabetes_cases<10)
test_d %>% dplyr::filter(ed_4==5)%>%summarize(sum=n())
#145 districts
test_d %>% dplyr::filter(ed_4==1)%>%summarize(sum=n())
#30
test_h<-dplyr::filter(test, hypertension_cases<10)
test_h %>% dplyr::filter(ed_4==5)%>%summarize(sum=n())
#3 districts
test_h %>% dplyr::filter(ed_4==1)%>%summarize(sum=n())
#0 districts
test_o<-dplyr::filter(test, obesity_cases<10)
test_o %>% dplyr::filter(ed_4==5)%>%summarize(sum=n())
#98 districts
test_o %>% dplyr::filter(ed_4==1)%>%summarize(sum=n())
#52 districts
test_c<-dplyr::filter(test, csmoke_cases<10)
test_c %>% dplyr::filter(ed_4==5)%>%summarize(sum=n())
#121 districts
test_c %>% dplyr::filter(ed_4==1)%>%summarize(sum=n())
#5 districts






##count district cases of diabetes/hypertension/obesity/csmoke in higher SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(ed_4==5)%>%summarize(diabetes_cases_5= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases_5=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases_5=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_5=sum(csmoke_dbl,na.rm=TRUE))



##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_5)
diabetes_analysis<-left_join(Ana1, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_5)
hypertension_analysis<-left_join(Ana1, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_5)
obesity_analysis<-left_join(Ana1, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_5)
csmoke_analysis<-left_join(Ana1, csmoke_list, by="ex_d_name_ind")

#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_5<10)%>%summarize(sum=n())#145 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_5<10)%>%summarize(sum=n())#3 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_5<10)%>%summarize(sum=n())#98 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_5<10)%>%summarize(sum=n())
#121 districts


##count district cases of diabetes/hypertension/obesity/csmoke in lower SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(ed_4==1)%>%summarize(diabetes_cases_1= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases_1=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases_1=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_1=sum(csmoke_dbl,na.rm=TRUE))


##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_1)
diabetes_analysis<-left_join(diabetes_analysis, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_1)
hypertension_analysis<-left_join(hypertension_analysis, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_1)
obesity_analysis<-left_join(obesity_analysis, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_1)
csmoke_analysis<-left_join(csmoke_analysis, csmoke_list, by="ex_d_name_ind")


#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_1<10)%>%summarize(sum=n())#30 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_1<10)%>%summarize(sum=n())#0 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_1<10)%>%summarize(sum=n())#52 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_1<10)%>%summarize(sum=n())
#5 districts


#calculate sum of cases in higher and lower SES category for each CVD risk factor & exclude districts with <20 sum of cases (in higher and lower SES category)/district for the individual CVD risk factor

##diabetes
diabetes_analysis<-diabetes_analysis%>%mutate(sum_cases=diabetes_cases_1+diabetes_cases_5)
diabetes_analysis<-diabetes_analysis%>%dplyr::filter(sum_cases>=20)


##hypertension
hypertension_analysis<-hypertension_analysis%>%mutate(sum_cases=hypertension_cases_1+hypertension_cases_5)
hypertension_analysis<-hypertension_analysis%>%dplyr::filter(sum_cases>=20)


##obesity
obesity_analysis<-obesity_analysis%>%mutate(sum_cases=obesity_cases_1+obesity_cases_5)
obesity_analysis<-obesity_analysis%>%dplyr::filter(sum_cases>=20)

##csmooke
csmoke_analysis<-csmoke_analysis%>%mutate(sum_cases=csmoke_cases_1+csmoke_cases_5)
csmoke_analysis<-csmoke_analysis%>%dplyr::filter(sum_cases>=20)



## check number of districts that have not been dropped
diabetes_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#469 districts->516-469=47 rows removes from graph
#pci:516-123=393 districts
hypertension_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#516 districts->516-516=0 rows removes from graph
#pci:516-80=436 districts
obesity_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#443 districts->516-443=73 rows removes from graph
#pci:516-140=376 districts
csmoke_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#508 districts->516-508=8 rows removes from graph
#pci:516-87=429 districts

```



```{r Regression analysis district level glm}


#####diabetes

#logistic regression grouped by district

analysis <-diabetes_analysis%>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_diab_narrow_ind ~ age_grp+sex+urban+ed_4,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




#####hypertension

#logistic regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_htn_narrow_ind ~ age_grp+urban+sex+ed_4,family=binomial(link = "logit"), .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######obesity

#logistic regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(bmigrt27.5 ~ age_grp+urban+sex+ed_4,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)



#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")





######csmoke

#logistic regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(csmoke ~ age_grp+urban+sex+ed_4,family=binomial(link = "logit"), .))


#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




```

```{r Regression analysis district level lm}


#####diabetes

#linear regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_diab_narrow_ind_dbl ~ age_grp+sex+urban+ed_4, .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####hypertension

#linear regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_htn_narrow_ind_dbl ~ age_grp+urban+sex+ed_4, .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######obesity

#linear regression grouped by district
analysis <-obesity_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(bmigrt27.5_dbl ~ age_grp+urban+sex+ed_4, .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)



#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######csmoke

#linear regression grouped by district
analysis <-csmoke_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(csmoke_dbl ~ age_grp+urban+sex+ed_4, .))



#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="ed_45")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




```







```{r graphs: filter:one/value district }

#Ana1_DLHS_ed<-Ana1

##only 1 value/district (2 for rurual/urban)

Ana2 <- Ana1_DLHS_ed%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,ed_att_new,PCI_districts,literacy_rate_female,urban_prop)%>%ungroup()

Ana2_urban_rural <- Ana1_DLHS_ed%>% group_by(ex_d_name_ind,urban_lab)%>%dplyr::filter(row_number()==1)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,medianai_r_u,urban_lab,urban)%>%ungroup


```

######graphs 
```{r graphs:educational attainment}

##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)

print(fig)

 mod<-lm((diab_f_coeff) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((diab_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##hypertension

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)



##bmi

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
 ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
       axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
      plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
       strip.background = element_blank())+
scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ed_att_new,data=Ana2)
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


```

```{r graphs:PCI_districts }


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((diab_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)

#odds ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((diab_f_coeff_log) ~PCI_districts,data=Ana2) 
summary(mod)



##hypertension

#absolute differences

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((htn_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)

#ods ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((htn_f_coeff_log) ~PCI_districts,data=Ana2) 

summary(mod)


##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((bmi_f_coeff*100) ~PCI_districts,data=Ana2) 

summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((bmi_f_coeff_log) ~PCI_districts,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~PCI_districts,data=Ana2) 

summary(mod)



```

```{r graphs:Female literacy rate }


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((diab_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((diab_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)



##Hypertension

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)



##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)

#odds ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)


```



```{r graphs: Median household wealth }


urban<-dplyr::filter(Ana2_urban_rural,urban==1)
rural<-dplyr::filter(Ana2_urban_rural,urban==0)

labels<-c (urban="Urban", rural= "Rural")

##Diabetes

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((diab_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((diab_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((diab_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((diab_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)




##Hypertension

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((htn_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)
 
mod_r<-lm((htn_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((htn_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((htn_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)


##bmi

#absolute difference 

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((bmi_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)
 
mod_r<-lm((bmi_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((bmi_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((bmi_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)



##csmoke

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((csmoke_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_u<-lm((csmoke_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_u)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((csmoke_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((csmoke_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)


```








```{r graphs: urban prop}


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((diab_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((diab_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##Hypertension

#absolute difference 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ urban_prop,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff_log) ~ urban_prop,data=Ana2) 
summary(mod)

```








#District level regressions using wealth quintiles computed for each district : bottom two vs top two quintiles
#####linear and logistic regressions
```{r filter districts with no contrasts}


Ana1<-India_DLHS_AHS

Ana1<-dplyr::select(Ana1,ex_d_name_ind,ex_diab_narrow_ind,ex_htn_narrow_ind,bmigrt27.5,csmoke,ex_diab_narrow_ind_dbl,ex_htn_narrow_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban, urban_lab,hh_wealth_quintile_groups_district,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai_r_u)


Ana1 <- dplyr::filter(Ana1, urban_prop>=5 & rural_prop>=5)


 
#drop districts with <50 cases in low or high SES category


##test
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#517 districts
test<-Ana1%>%group_by(ex_d_name_ind,hh_wealth_quintile_groups_district)%>%summarize(sum=n())
test<-dplyr::filter(test, sum<50)
dplyr::filter(test, hh_wealth_quintile_groups_district==0 | hh_wealth_quintile_groups_district==2)%>%summarize(sum=n())
#0 districts



Ana1<-mutate(Ana1,
                   wq_0_o=ifelse(is.na(hh_wealth_quintile_groups_district)==T,NA,ifelse(hh_wealth_quintile_groups_district==0,1,0)),
                   
                   wq_2_o=ifelse(is.na(hh_wealth_quintile_groups_district)==T,NA,ifelse(hh_wealth_quintile_groups_district==2,1,0))
)

Ana1<- Ana1 %>%group_by(ex_d_name_ind)%>%mutate(count_wq_0_o=sum(wq_0_o,na.rm=TRUE), count_wq_2_o=sum(wq_2_o,na.rm=TRUE))

##filter districts >=50 cases
Ana1<-Ana1 %>% dplyr::filter(count_wq_0_o >=50 & count_wq_2_o>=50)%>%ungroup()

##check numbers
Ana1%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#517



#drop districts with too few cases in lower & higher SES category/CVD risk factor/district


##test if code is working
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#517 districts
test<-test%>%group_by(ex_d_name_ind,hh_wealth_quintile_groups_district)%>%summarize(diabetes_cases= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases=sum(csmoke_dbl,na.rm=TRUE))
test_d<-dplyr::filter(test, diabetes_cases<10)
test_d %>% dplyr::filter(hh_wealth_quintile_groups_district==2)%>%summarize(sum=n())
#35 districts
test_d %>% dplyr::filter(hh_wealth_quintile_groups_district==0)%>%summarize(sum=n())
#62 districts
test_h<-dplyr::filter(test, hypertension_cases<10)
test_h %>% dplyr::filter(hh_wealth_quintile_groups_district==2)%>%summarize(sum=n())
#0 districts
test_h %>% dplyr::filter(hh_wealth_quintile_groups_district==0)%>%summarize(sum=n())
#0 districts
test_o<-dplyr::filter(test, obesity_cases<10)
test_o %>% dplyr::filter(hh_wealth_quintile_groups_district==2)%>%summarize(sum=n())
#48 districts
test_o %>% dplyr::filter(hh_wealth_quintile_groups_district==0)%>%summarize(sum=n())
#76 districts
test_c<-dplyr::filter(test, csmoke_cases<10)
test_c %>% dplyr::filter(hh_wealth_quintile_groups_district==2)%>%summarize(sum=n())
#6 districts
test_c %>% dplyr::filter(hh_wealth_quintile_groups_district==0)%>%summarize(sum=n())
#4 districts






##count district cases of diabetes/hypertension/obesity/csmoke in higher SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_groups_district==2)%>%summarize(diabetes_cases_2= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases_2=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases_2=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_2=sum(csmoke_dbl,na.rm=TRUE))



##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_2)
diabetes_analysis<-left_join(Ana1, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_2)
hypertension_analysis<-left_join(Ana1, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_2)
obesity_analysis<-left_join(Ana1, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_2)
csmoke_analysis<-left_join(Ana1, csmoke_list, by="ex_d_name_ind")

#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_2<10)%>%summarize(sum=n())#35 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_2<10)%>%summarize(sum=n())#0 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_2<10)%>%summarize(sum=n())#48 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_2<10)%>%summarize(sum=n())
#6 districts


##count district cases of diabetes/hypertension/obesity/csmoke in lower SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(hh_wealth_quintile_groups_district==0)%>%summarize(diabetes_cases_0= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases_0=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases_0=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_0=sum(csmoke_dbl,na.rm=TRUE))


##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_0)
diabetes_analysis<-left_join(diabetes_analysis, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_0)
hypertension_analysis<-left_join(hypertension_analysis, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_0)
obesity_analysis<-left_join(obesity_analysis, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_0)
csmoke_analysis<-left_join(csmoke_analysis, csmoke_list, by="ex_d_name_ind")


#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_0<10)%>%summarize(sum=n())#62 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_0<10)%>%summarize(sum=n())#0 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_0<10)%>%summarize(sum=n())#76 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_0<10)%>%summarize(sum=n())
#4 districts


#calculate sum of cases in higher and lower SES category for each CVD risk factor & exclude districts with <20 sum of cases (in higher and lower SES category)/district for the individual CVD risk factor

##diabetes
diabetes_analysis<-diabetes_analysis%>%mutate(sum_cases=diabetes_cases_0+diabetes_cases_2)
diabetes_analysis<-diabetes_analysis%>%dplyr::filter(sum_cases>=20)


##hypertension
hypertension_analysis<-hypertension_analysis%>%mutate(sum_cases=hypertension_cases_0+hypertension_cases_2)
hypertension_analysis<-hypertension_analysis%>%dplyr::filter(sum_cases>=20)


##obesity
obesity_analysis<-obesity_analysis%>%mutate(sum_cases=obesity_cases_0+obesity_cases_2)
obesity_analysis<-obesity_analysis%>%dplyr::filter(sum_cases>=20)

##csmooke
csmoke_analysis<-csmoke_analysis%>%mutate(sum_cases=csmoke_cases_0+csmoke_cases_2)
csmoke_analysis<-csmoke_analysis%>%dplyr::filter(sum_cases>=20)



## check number of districts that have not been dropped
diabetes_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#477-> 517-477=40 rows removed from graphs
#517-116=401 districts
hypertension_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#517->517-517=0 rows removed from graphs
#pci:517-80=437 districts
obesity_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#461-> 517-461=56 rows removed from graphs
#pci:517-128=389 districts
csmoke_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#514-> 517-514=3 rows removed from graphs
#pci: 517-83= 434 districts






```


```{r Regression analysis district level glm}


#####diabetes

#logistic regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_diab_narrow_ind ~ age_grp+urban+sex+hh_wealth_quintile_groups_district,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="hh_wealth_quintile_groups_district2")


coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####hypertension

#logistic regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_htn_narrow_ind ~ age_grp+urban+sex+hh_wealth_quintile_groups_district,family=binomial(link = "logit"), .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####obesity

#logistic regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(bmigrt27.5 ~ age_grp+urban+sex+hh_wealth_quintile_groups_district,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


#####csmoke

#logistic regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(csmoke ~ age_grp+urban+sex+hh_wealth_quintile_groups_district,family=binomial(link = "logit"), .))



#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


```

```{r Regression analysis district level lm}


#####diabetes

#linear regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_diab_narrow_ind_dbl ~ age_grp+sex+urban+hh_wealth_quintile_groups_district, .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




#####hypertension

#linear regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_htn_narrow_ind_dbl ~ age_grp+urban+sex+hh_wealth_quintile_groups_district, .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


######obesity

#linear regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(bmigrt27.5_dbl ~ age_grp+urban+sex+hh_wealth_quintile_groups_district, .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)



#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######csmoke

#linear regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(csmoke_dbl ~ age_grp+urban+sex+hh_wealth_quintile_groups_district, .))


#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="hh_wealth_quintile_groups_district2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




```









```{r graphs: filter: one value/district }

#Ana1_DLHS_w_g<-Ana1

#only 1 value/district (for rural/urban 2)
Ana2 <- Ana1_DLHS_w_g%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,ed_att_new,PCI_districts,literacy_rate_female,urban_prop)%>%ungroup

Ana2_urban_rural <- Ana1_DLHS_w_g%>% group_by(ex_d_name_ind,urban_lab)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,medianai_r_u,urban_lab,urban)%>%dplyr::filter(row_number()==1)%>%ungroup



```
######graphs
```{r graphs:educational attainment}

##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)

print(fig)

mod<-lm((diab_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((diab_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)



##hypertension

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)



##bmi

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
 ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
       axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
      plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
       strip.background = element_blank())+
scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ed_att_new,data=Ana2)
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


```

```{r graphs:PCI_districts }


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((diab_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)

#odds ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((diab_f_coeff_log) ~PCI_districts,data=Ana2) 
summary(mod)



##hypertension

#absolute differences

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((htn_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)

#ods ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((htn_f_coeff_log) ~PCI_districts,data=Ana2) 

summary(mod)


##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((bmi_f_coeff*100) ~PCI_districts,data=Ana2) 

summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((bmi_f_coeff_log) ~PCI_districts,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.0010, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff*100) ~PCI_districts,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=PCI_districts),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=PCI_districts),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "GDP/capita (Rs)",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(20000, 40000,60000),limits=c(0,80000),labels=c( "20000"="20k","40000"="40k","60000"="60k"))+coord_fixed(ratio=1/0.00015, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~PCI_districts,data=Ana2) 

summary(mod)



```

```{r graphs:Female literacy rate }


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((diab_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((diab_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)



##Hypertension

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)



##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)

#odds ratio 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~literacy_rate_female,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=literacy_rate_female),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=literacy_rate_female),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Female literacy rate",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff_log) ~literacy_rate_female,data=Ana2) 
summary (mod)


```



```{r graphs: Median household wealth }


urban<-dplyr::filter(Ana2_urban_rural,urban==1)
rural<-dplyr::filter(Ana2_urban_rural,urban==0)

labels<-c (urban="Urban", rural= "Rural")

##Diabetes

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((diab_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((diab_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((diab_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((diab_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)




##Hypertension

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((htn_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)
 
mod_r<-lm((htn_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((htn_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((htn_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)


##bmi

#absolute difference 

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((bmi_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)
 
mod_r<-lm((bmi_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_r)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((bmi_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((bmi_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)



##csmoke

#absolute difference

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median Household wealth",y="Difference in percentage points",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=1/8, expand=F)
print(fig)

mod_u<-lm((csmoke_f_coeff*100) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_u<-lm((csmoke_f_coeff*100) ~medianai_r_u,data=rural) 
summary(mod_u)

#odds ratio

fig<- Ana2_urban_rural %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=medianai_r_u),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=medianai_r_u),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "Median household wealth",y="Odds Ratio",
       fill="") +
  facet_wrap(~urban_lab,labeller = labeller(urban_lab=labels))+
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        panel.spacing = unit(2, "lines"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.text.x=element_text(size=20, face="bold"),
        strip.text.y=element_text(size=20, face="bold"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c((-1.5), 0, 1.5),limits=c((-2.5),2.5))+coord_fixed(ratio=10/12, expand=F)
print(fig)

mod_u<-lm((csmoke_f_coeff_log) ~medianai_r_u,data=urban) 
summary(mod_u)

mod_r<-lm((csmoke_f_coeff_log) ~medianai_r_u,data=rural) 
summary(mod_r)


```








```{r graphs: urban prop}


##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((diab_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((diab_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##Hypertension

#absolute difference 

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##bmi

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~urban_prop,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~urban_prop,data=Ana2) 
summary(mod)




##csmoke

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
   scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/7.5,expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ urban_prop,data=Ana2) 
summary(mod)
 
#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=urban_prop),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=urban_prop),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants who live in an urban area",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(0,125))+coord_fixed(ratio=12/1.14, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff_log) ~ urban_prop,data=Ana2) 
summary(mod)

```




#District level regressions using wealth quintiles computed nationally : bottom two vs top two quintiles
#####linear and logistic regressions
```{r filter districts with no contrasts}


Ana1<-India_DLHS_AHS

Ana1<-dplyr::select(Ana1,ex_d_name_ind,ex_diab_narrow_ind,ex_htn_narrow_ind,bmigrt27.5,csmoke,ex_diab_narrow_ind_dbl,ex_htn_narrow_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban, urban_lab,wealth_quintile_rurb_groups,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai_r_u)


Ana1 <- dplyr::filter(Ana1, urban_prop>=5 & rural_prop>=5)


 
#drop districts with <50 cases in low or high SES category


##test
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#517 districts
test<-Ana1%>%group_by(ex_d_name_ind,wealth_quintile_rurb_groups)%>%summarize(sum=n())
test<-dplyr::filter(test, sum<50)
dplyr::filter(test, wealth_quintile_rurb_groups==0 | wealth_quintile_rurb_groups==2)%>%summarize(sum=n())
#16 districts



Ana1<-mutate(Ana1,
                   wq_0_o=ifelse(is.na(wealth_quintile_rurb_groups)==T,NA,ifelse(wealth_quintile_rurb_groups==0,1,0)),
                   
                   wq_2_o=ifelse(is.na(wealth_quintile_rurb_groups)==T,NA,ifelse(wealth_quintile_rurb_groups==2,1,0))
)

Ana1<- Ana1 %>%group_by(ex_d_name_ind)%>%mutate(count_wq_0_o=sum(wq_0_o,na.rm=TRUE), count_wq_2_o=sum(wq_2_o,na.rm=TRUE))

##filter districts >=50 cases
Ana1<-Ana1 %>% dplyr::filter(count_wq_0_o >=50 & count_wq_2_o>=50)%>%ungroup()

##check numbers
Ana1%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(sum=n())
#501



#drop districts with too few cases in lower & higher SES category/CVD risk factor/district


##test if code is working
test<-Ana1
test%>%group_by(ex_d_name_ind)%>%summarize(sum=n())
#501 districts
test<-test%>%group_by(ex_d_name_ind,wealth_quintile_rurb_groups)%>%summarize(diabetes_cases= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases=sum(csmoke_dbl,na.rm=TRUE))
test_d<-dplyr::filter(test, diabetes_cases<10)
test_d %>% dplyr::filter(wealth_quintile_rurb_groups==2)%>%summarize(sum=n())
#88 districts
test_d %>% dplyr::filter(wealth_quintile_rurb_groups==0)%>%summarize(sum=n())
#47 districts
test_h<-dplyr::filter(test, hypertension_cases<10)
test_h %>% dplyr::filter(wealth_quintile_rurb_groups==2)%>%summarize(sum=n())
#4 districts
test_h %>% dplyr::filter(wealth_quintile_rurb_groups==0)%>%summarize(sum=n())
#1 districts
test_o<-dplyr::filter(test, obesity_cases<10)
test_o %>% dplyr::filter(wealth_quintile_rurb_groups==2)%>%summarize(sum=n())
#77 districts
test_o %>% dplyr::filter(wealth_quintile_rurb_groups==0)%>%summarize(sum=n())
#53 districts
test_c<-dplyr::filter(test, csmoke_cases<10)
test_c %>% dplyr::filter(wealth_quintile_rurb_groups==2)%>%summarize(sum=n())
#62 districts
test_c %>% dplyr::filter(wealth_quintile_rurb_groups==0)%>%summarize(sum=n())
#5 districts






##count district cases of diabetes/hypertension/obesity/csmoke in higher SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(wealth_quintile_rurb_groups==2)%>%summarize(diabetes_cases_2= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases_2=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases_2=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_2=sum(csmoke_dbl,na.rm=TRUE))



##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_2)
diabetes_analysis<-left_join(Ana1, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_2)
hypertension_analysis<-left_join(Ana1, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_2)
obesity_analysis<-left_join(Ana1, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_2)
csmoke_analysis<-left_join(Ana1, csmoke_list, by="ex_d_name_ind")

#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_2<10)%>%summarize(sum=n())#88 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_2<10)%>%summarize(sum=n())#4 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_2<10)%>%summarize(sum=n())#77 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_2<10)%>%summarize(sum=n())
#62 districts


##count district cases of diabetes/hypertension/obesity/csmoke in lower SES category
case_list <-Ana1%>%group_by(ex_d_name_ind)%>%dplyr::filter(wealth_quintile_rurb_groups==0)%>%summarize(diabetes_cases_0= sum(ex_diab_narrow_ind_dbl, na.rm=TRUE),hypertension_cases_0=sum(ex_htn_narrow_ind_dbl,na.rm=TRUE),obesity_cases_0=sum(bmigrt27.5_dbl,na.rm=TRUE), csmoke_cases_0=sum(csmoke_dbl,na.rm=TRUE))


##merge by districts

##diabetes
diabetes_list<-dplyr::select(case_list,ex_d_name_ind,diabetes_cases_0)
diabetes_analysis<-left_join(diabetes_analysis, diabetes_list, by="ex_d_name_ind")

##hypertension
hypertension_list<-dplyr::select(case_list,ex_d_name_ind,hypertension_cases_0)
hypertension_analysis<-left_join(hypertension_analysis, hypertension_list, by="ex_d_name_ind")

##obesity
obesity_list<-dplyr::select(case_list,ex_d_name_ind,obesity_cases_0)
obesity_analysis<-left_join(obesity_analysis, obesity_list, by="ex_d_name_ind")

##csmoke
csmoke_list<-dplyr::select(case_list,ex_d_name_ind,csmoke_cases_0)
csmoke_analysis<-left_join(csmoke_analysis, csmoke_list, by="ex_d_name_ind")


#check numbers test
diabetes_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(diabetes_cases_0<10)%>%summarize(sum=n())#47 districts
hypertension_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(hypertension_cases_0<10)%>%summarize(sum=n())#1 districts
obesity_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(obesity_cases_0<10)%>%summarize(sum=n())#53 districts
csmoke_analysis %>%group_by(ex_d_name_ind)%>%dplyr::filter(csmoke_cases_0<10)%>%summarize(sum=n())
#5 districts


#calculate sum of cases in higher and lower SES category for each CVD risk factor & exclude districts with <20 sum of cases (in higher and lower SES category)/district for the individual CVD risk factor

##diabetes
diabetes_analysis<-diabetes_analysis%>%mutate(sum_cases=diabetes_cases_0+diabetes_cases_2)
diabetes_analysis<-diabetes_analysis%>%dplyr::filter(sum_cases>=20)


##hypertension
hypertension_analysis<-hypertension_analysis%>%mutate(sum_cases=hypertension_cases_0+hypertension_cases_2)
hypertension_analysis<-hypertension_analysis%>%dplyr::filter(sum_cases>=20)


##obesity
obesity_analysis<-obesity_analysis%>%mutate(sum_cases=obesity_cases_0+obesity_cases_2)
obesity_analysis<-obesity_analysis%>%dplyr::filter(sum_cases>=20)

##csmooke
csmoke_analysis<-csmoke_analysis%>%mutate(sum_cases=csmoke_cases_0+csmoke_cases_2)
csmoke_analysis<-csmoke_analysis%>%dplyr::filter(sum_cases>=20)



## check number of districts that have not been dropped
diabetes_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#466-> 501-466=35 rows removed from graphs
hypertension_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#501->501-501=0 rows removed from graphs
obesity_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#448-> 501-448=53 rows removed from graphs
csmoke_analysis%>%group_by(ex_d_name_ind)%>%dplyr::select(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%summarize(n=n())#499-> 501-499=2 rows removed from graphs







```


```{r Regression analysis district level glm}


#####diabetes

#logistic regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_diab_narrow_ind ~ age_grp+urban+sex+wealth_quintile_rurb_groups,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="wealth_quintile_rurb_groups2")


coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####hypertension

#logistic regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(ex_htn_narrow_ind ~ age_grp+urban+sex+wealth_quintile_rurb_groups,family=binomial(link = "logit"), .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



#####obesity

#logistic regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(bmigrt27.5 ~ age_grp+urban+sex+wealth_quintile_rurb_groups,family=binomial(link = "logit"), .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff_log =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


#####csmoke

#logistic regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=bayesglm(csmoke ~ age_grp+urban+sex+wealth_quintile_rurb_groups,family=binomial(link = "logit"), .))



#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff_log=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


```

```{r Regression analysis district level lm}


#####diabetes

#linear regression grouped by district

analysis <-diabetes_analysis %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_diab_narrow_ind_dbl ~ age_grp+sex+urban+wealth_quintile_rurb_groups, .))

#extract information with broom 
coeffs_diab_f <- analysis %>% tidy(mod)




#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_diab_f, term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only, diab_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




#####hypertension

#linear regression grouped by district
analysis <-hypertension_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(ex_htn_narrow_ind_dbl ~ age_grp+urban+sex+wealth_quintile_rurb_groups, .))


#extract information with broom 
coeffs_htn_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_htn_f, term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5, ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,htn_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")


######obesity

#linear regression grouped by district
analysis <-obesity_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(bmigrt27.5_dbl ~ age_grp+urban+sex+wealth_quintile_rurb_groups, .))

#extract information with broom 
coeffs_bmi_f <- analysis %>% tidy(mod)



#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_bmi_f , term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,bmi_f_coeff =estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")



######csmoke

#linear regression grouped by district
analysis <-csmoke_analysis  %>% group_by(ex_d_name_ind) %>% do (mod=lm(csmoke_dbl ~ age_grp+urban+sex+wealth_quintile_rurb_groups, .))


#extract information with broom 
coeffs_csmoke_f <- analysis %>% tidy(mod)


#filter wealth factor 5 and merge
coeffs_w5 <- dplyr::filter(coeffs_csmoke_f , term=="wealth_quintile_rurb_groups2")

coeffs_w5_only <- dplyr::select(coeffs_w5,ex_d_name_ind, estimate)

coeffs_w5_only <- rename(coeffs_w5_only,csmoke_f_coeff=estimate)

Ana1 <- dplyr::full_join(Ana1, coeffs_w5_only, by= "ex_d_name_ind")




```









```{r graphs: filter: one value/district }

#Ana1_DLHS_w_n_g<-Ana1

#only 1 value/district (for rural/urban 2)
Ana2 <- Ana1_DLHS_w_n_g%>% group_by(ex_d_name_ind)%>%dplyr::filter(row_number()==1)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,ed_att_new,PCI_districts,literacy_rate_female,urban_prop)%>%ungroup

Ana2_urban_rural <- Ana1_DLHS_w_n_g%>% group_by(ex_d_name_ind,urban_lab)%>%dplyr::select(diab_f_coeff_log,htn_f_coeff_log,bmi_f_coeff_log,csmoke_f_coeff_log,diab_f_coeff,htn_f_coeff,bmi_f_coeff,csmoke_f_coeff,medianai_r_u,urban_lab,urban)%>%dplyr::filter(row_number()==1)%>%ungroup



```
######graphs
```{r graphs:educational attainment}

##Diabetes

#absolute difference

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)

print(fig)

mod<-lm((diab_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=diab_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((diab_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)



##hypertension

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((htn_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=htn_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((htn_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)



##bmi

#absolute difference
fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((bmi_f_coeff*100) ~ed_att_new,data=Ana2) 
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=bmi_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)
print(fig)

mod<-lm((bmi_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


##csmoke

#absolute difference

fig<- Ana2 %>% 
 ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff*100, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Difference in percentage points",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
       axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
      plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
       strip.background = element_blank())+
scale_y_continuous(breaks = c((-20),(-10),0,10,20),limits=c((-33),33))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=12/10, expand=F)
print(fig)

mod<-lm((csmoke_f_coeff*100) ~ed_att_new,data=Ana2)
summary(mod)

#odds ratio

fig<- Ana2 %>% 
  ggplot()+ geom_jitter(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),size=0.3)+
geom_smooth(mapping=aes(y=csmoke_f_coeff_log, x=ed_att_new),method="lm",color="gray48",se=FALSE,size=0.7)+theme_classic()+
labs(x = "% of participants in a district who completed primary education",y="Odds Ratio",
       fill="") +
  theme(axis.text=element_text(size=22),
        axis.title.x = element_text(size=20, face="bold",margin = margin(t = 20)),
        axis.title.y = element_text(size=20, face="bold",margin = margin(r = 20)),
        #legend.text=element_text(size=10),
        #legend.title = element_text(size=8,face="bold"),
        #legend.key.size = unit(0.45,"cm"),
        plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
        strip.background = element_blank())+
  scale_y_continuous(breaks = c((-2.996),(-1.609),0,1.609,2.996),limits=c((-5.3),5.3),labels=c( "-2.996"="0.05","-1.609"="0.2","0"= "1","1.609"="5","2.996"="20"))+
  scale_x_continuous(breaks = c(25, 50, 75, 100),limits=c(15,110))+coord_fixed(ratio=15/2, expand=F)

print(fig)

mod<-lm((csmoke_f_coeff_log) ~ed_att_new,data=Ana2) 
summary(mod)


```






















#Multilevel Models


```{r select variables for multilevel modeling }

multilevel<-dplyr::select(India_DLHS_AHS,ex_d_name_ind,ex_diab_narrow_ind_dbl, ex_diab_narrow_ind_ahs_unfasted_dbl,ex_htn_narrow_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,ed_5,hh_wealth_quintile_district,hh_wealth_quintile_district_c,asset_index_combined,wealth_quintile_rurb,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai)

library(broom.mixed) #to extract coefficents (for mixed models)
library(lme4)
library(lmerTest)

```

```{r convert categorical varibales with >1 level in binary variables& perform Grand Mean Centering for continuous varibales}


#####################individual level varibles
#age_group:


multilevel<-mutate(multilevel,
              agr_15_19=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="15-19",1,0)),
              agr_20_24=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="20-24",1,0)),
              agr_25_29=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="25-29",1,0)),
              agr_30_34=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="30-34",1,0)),
              agr_35_39=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="35-39",1,0)),
              agr_40_44=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="40-44",1,0)),
              agr_45_49=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="45-49",1,0)),
              agr_50_54=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="50-54",1,0)),
              agr_55_59=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="55-59",1,0)),
              agr_60_64=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="60-64",1,0)),
              agr_65=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp==">65",1,0))
)


#district household_wealth_quintile:


multilevel<-mutate(multilevel,
                   wq_1=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   wq_2=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==2,1,0)),
                   wq_3=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==3,1,0)),
                   wq_4=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==4,1,0)),
                   wq_5=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
                  
)

#national household wealth quintile
multilevel<-mutate(multilevel,
                   wq_1_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==1,1,0)),
                   wq_2_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==2,1,0)),
                   wq_3_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==3,1,0)),
                   wq_4_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==4,1,0)),
                   wq_5_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==5,1,0))
                  
)


#education

multilevel<-mutate(multilevel,
                   ed_1_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==1,1,0)),
                   ed_3_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==3,1,0)),
                   ed_4_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==4,1,0)),
                   ed_5_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==5,1,0)),
                   ed_6_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==6,1,0))
)



#sex


multilevel<-mutate(multilevel,
                   sex_center=sex)


#urban


multilevel<-mutate(multilevel,
                   urban_center=urban)





###############district-level variables:

#district wealth; continuous->scaled
multilevel<-mutate(multilevel,
                   medianai_center=(medianai-(mean(medianai, na.rm=TRUE)))/(2* sd(medianai,na.rm=TRUE)))


##urban_prop#continuous->scaled
multilevel<-mutate(multilevel,
                   urban_prop_center=(urban_prop-(mean(urban_prop, na.rm=TRUE)))/(2* sd(urban_prop,na.rm=TRUE)))


###educational attainment#continous->scaled
multilevel<-mutate(multilevel,
                   ed_att_new_center=(ed_att_new-(mean(ed_att_new, na.rm=TRUE)))/(2* sd(ed_att_new,na.rm=TRUE)))



##PCI_districts#continuous->scaled

multilevel$PCI_districts<-as.numeric(multilevel$PCI_districts)
multilevel<-mutate(multilevel,
                   PCI_district_center=(PCI_districts-(mean(PCI_districts, na.rm=TRUE)))/(2* sd(PCI_districts,na.rm=TRUE)))




##female literacy rate#continuous->scaled


multilevel<-mutate(multilevel,
                   literacy_rate_female_center=(literacy_rate_female-(mean(literacy_rate_female, na.rm=TRUE)))/(2* sd(literacy_rate_female,na.rm=TRUE)))





```

```{r Correlation of district level indicators}

#Correlation of district level indicator variables 

districts_indicator_test<-dplyr::select(multilevel,ex_d_name_ind,literacy_rate_female_center,PCI_district_center,ed_att_new_center,urban_prop_center,medianai_center)

districts_indicator_test<- districts_indicator_test[!duplicated(districts_indicator_test$ex_d_name_ind), ]


####educational attainment####
mod<-lm(ed_att_new_center~medianai_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_edatt_med_DLHS_AHS_.csv")


mod<-lm(ed_att_new_center~PCI_district_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_edatt_pci_DLHS_AHS_.csv")



mod<-lm(ed_att_new_center~urban_prop_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_edatt_urban_DLHS_AHS_.csv")



mod<-lm(ed_att_new_center~literacy_rate_female_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_edatt_lf_DLHS_AHS_.csv")




####medianai####
mod<-lm(medianai_center~ed_att_new_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_med_edatt_DLHS_AHS_.csv")


mod<-lm(medianai_center~PCI_district_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_med_pci_DLHS_AHS_.csv")



mod<-lm(medianai_center~urban_prop_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_med_urban_DLHS_AHS_.csv")



mod<-lm(medianai_center~literacy_rate_female_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_med_lf_DLHS_AHS_.csv")




####PCI districts####
mod<-lm(PCI_district_center~ed_att_new_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_pci_edatt_DLHS_AHS_.csv")


mod<-lm(PCI_district_center~medianai_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_pci_med_DLHS_AHS_.csv")



mod<-lm(PCI_district_center~urban_prop_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_pci_urban_DLHS_AHS_.csv")



mod<-lm(PCI_district_center~literacy_rate_female_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_pci_lf_DLHS_AHS_.csv")




####urban prop####
mod<-lm(urban_prop_center~ed_att_new_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_urban_edatt_DLHS_AHS_.csv")


mod<-lm(urban_prop_center~medianai_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_urban_med_DLHS_AHS_.csv")



mod<-lm(urban_prop_center~PCI_district_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_urban_pci_DLHS_AHS_.csv")



mod<-lm(urban_prop_center~literacy_rate_female_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_urban_lf_DLHS_AHS_.csv")




####literacy rate female####
mod<-lm(literacy_rate_female_center~ed_att_new_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_fl_edatt_DLHS_AHS_.csv")


mod<-lm(literacy_rate_female_center~medianai_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_fl_med_DLHS_AHS_.csv")



mod<-lm(literacy_rate_female_center~PCI_district_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_fl_pci_DLHS_AHS_.csv")



mod<-lm(literacy_rate_female_center~urban_prop_center,data=districts_indicator_test)
coeffs <-mod%>%tidy(conf.int=TRUE,method="Wald")

write.csv(coeffs,"mod_lf_urban_DLHS_AHS_.csv")



```










######Multilevel Model: wealth interaction (quintiles computed for each district)



```{r level 1}




#diabetes
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:17,]
coeffs_diab_1<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_1,"DLHS_4_AHS_diab_1.csv")


#hypertension
htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:17,]
coeffs_htn_1<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_1,"DLHS_4_AHS_htn_1.csv")



#bmi

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:17,]
coeffs_bmi_1<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_1,"DLHS_4_AHS_bmi_1.csv")


#csmoke
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"DLHS_4_AHS_csmoke_1.csv")








```










```{r lmer analysis diabetes}



#median center

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_medi<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_medi,"DLHS_4_AHS_diab_medianai_center.csv")



#PCI

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2+PCI_district_center:wq_3+PCI_district_center:wq_4+PCI_district_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_pci<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_pci,"DLHS_4_AHS_diab_pci.csv")





##educational attainment 

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_ed_att_new<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_ed_att_new,"DLHS_4_AHS_diab_ed_att_new.csv")




##urban prop

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_urban_prop<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_urban_prop,"DLHS_4_AHS_diab_urban_prop.csv")




#Female literacy rate
 
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_literacy_rate_female<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_literacy_rate_female,"DLHS_4_AHS_diab_literacy_rate_female.csv")





```





```{r lmer analysis hypertension}

#median center

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22,]
coeffs_htn_medi<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_medi,"DLHS_4_AHS_htn_medianai_center.csv")





#PCI

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2+PCI_district_center:wq_3+PCI_district_center:wq_4+PCI_district_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22,]
coeffs_htn_pci<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_pci,"DLHS_4_AHS_htn_pci.csv")






##educational attainment 

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22,]
coeffs_htn_ed_att_new<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_ed_att_new,"DLHS_4_AHS_htn_ed_att_new.csv")




##urban prop

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22,]
coeffs_htn_urban_prop<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_urban_prop,"DLHS_4_AHS_htn_urban_prop.csv")






#Female literacy rate
 
htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22,]
coeffs_htn_literacy_rate_female<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_literacy_rate_female,"DLHS_4_AHS_htn_literacy_rate_female.csv")




```

```{r lmer analysis obesity}



#median center

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_medi<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_medi,"DLHS_4_AHS_bmi_medianai_center.csv")



#PCI

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2+PCI_district_center:wq_3+PCI_district_center:wq_4+PCI_district_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_pci<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_pci,"DLHS_4_AHS_bmi_pci.csv")




##educational attainment 

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_ed_att_new<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_ed_att_new,"DLHS_4_AHS_bmi_ed_att_new.csv")




##urban prop

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_urban_prop<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_urban_prop,"DLHS_4_AHS_bmi_urban_prop.csv")




#Female literacy rate
 
bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_literacy_rate_female<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_literacy_rate_female,"DLHS_4_AHS_bmi_literacy_rate_female.csv")




```



```{r lmer analysis currently smoking }




#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"DLHS_4_AHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2+ PCI_district_center:wq_3+PCI_district_center:wq_4+ PCI_district_center:wq_5+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"DLHS_4_AHS_csmoke_pci.csv")






##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"DLHS_4_AHS_csmoke_ed_att_new.csv")




##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"DLHS_4_AHS_csmoke_urban_prop.csv")







#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"DLHS_4_AHS_csmoke_literacy_rate_female.csv")




```

######Multilevel Model: education interaction



```{r level 1}



#diabetes

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+(1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:17 ,]
coeffs_diab_1<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_1,"ed_DLHS_4_AHS_diab_1.csv")


#hypertension

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+(1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:17 ,]
coeffs_htn_1<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_1,"ed_DLHS_4_AHS_htn_1.csv")


#bmi

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+(1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:17 ,]
coeffs_bmi_1<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_1,"ed_DLHS_4_AHS_bmi_1.csv")


#csmoke

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17 ,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"ed_DLHS_4_AHS_csmoke_1.csv")












```







```{r lmer analysis diabetes}


#median center

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+medianai_center+ medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22 ,]
coeffs_diab_medi<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_medi,"ed_DLHS_4_AHS_diab_medianai_center.csv")



#PCI

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+PCI_district_center+  PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22 ,]
coeffs_diab_pci<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_pci,"ed_DLHS_4_AHS_diab_pci.csv")




##educational attainment 

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+ed_att_new_center+ ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_ed_att_new<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_ed_att_new,"ed_DLHS_4_AHS_diab_ed_att_new.csv")




##urban prop

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+urban_prop_center+ urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22 ,]
coeffs_diab_urban_prop<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_urban_prop,"ed_DLHS_4_AHS_diab_urban_prop.csv")




#Female literacy rate
 
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22 ,]
coeffs_diab_literacy_rate_female<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_literacy_rate_female,"ed_DLHS_4_AHS_diab_literacy_rate_female.csv")




```





```{r lmer analysis hypertension}


#median center

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+medianai_center+ medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22 ,]
coeffs_htn_medi<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_medi,"ed_DLHS_4_AHS_htn_medianai_center.csv")





#PCI

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+PCI_district_center+ PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22 ,]
coeffs_htn_pci<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_pci,"ed_DLHS_4_AHS_htn_pci.csv")



##educational attainment 

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+ed_att_new_center+ ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22,]
coeffs_htn_ed_att_new<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_ed_att_new,"ed_DLHS_4_AHS_htn_ed_att_new.csv")




##urban prop

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+urban_prop_center+ urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22 ,]
coeffs_htn_urban_prop<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_urban_prop,"ed_DLHS_4_AHS_htn_urban_prop.csv")




#Female literacy rate
 
htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+ literacy_rate_female_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22 ,]
coeffs_htn_literacy_rate_female<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_literacy_rate_female,"ed_DLHS_4_AHS_htn_literacy_rate_female.csv")



```

```{r lmer analysis obesity}


#median center

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+medianai_center+medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_medi<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_medi,"ed_DLHS_4_AHS_bmi_medianai_center.csv")



#PCI

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+PCI_district_center+ PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22 ,]
coeffs_bmi_pci<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_pci,"ed_DLHS_4_AHS_bmi_pci.csv")



##educational attainment 

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+ed_att_new_center+ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_ed_att_new<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_ed_att_new,"ed_DLHS_4_AHS_bmi_ed_att_new.csv")




##urban prop

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+urban_prop_center+urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+ urban_prop_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22 ,]
coeffs_bmi_urban_prop<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_urban_prop,"ed_DLHS_4_AHS_bmi_urban_prop.csv")



#Female literacy rate
 
bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22 ,]
coeffs_bmi_literacy_rate_female<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_literacy_rate_female,"ed_DLHS_4_AHS_bmi_literacy_rate_female.csv")



```
























```{r lmer analysis currently smoking}




#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+medianai_center+medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"ed_DLHS_4_AHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+PCI_district_center+  PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+ PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22 ,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"ed_DLHS_4_AHS_csmoke_pci.csv")







##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+ed_att_new_center+ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"ed_DLHS_4_AHS_csmoke_ed_att_new.csv")


##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+urban_prop_center+  +urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22 ,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"ed_DLHS_4_AHS_csmoke_urban_prop.csv")



#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22 ,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"ed_DLHS_4_AHS_csmoke_literacy_rate_female.csv")



```



######Multilevel: wealth interaction (quintiles computed nationally)


```{r select variables for multilevel modeling }

multilevel<-dplyr::select(India_DLHS_AHS,ex_d_name_ind,ex_diab_narrow_ind_dbl, ex_diab_narrow_ind_ahs_unfasted_dbl,ex_htn_narrow_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,ed_5,asset_index_combined,wealth_quintile_rurb,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai)

library(broom.mixed) #to extract coefficents (for mixed models)
library(lme4)
library(lmerTest)

```

```{r convert categorical varibales with >1 level in binary variables& perform Grand Mean Centering for continuous varibales}


#####################individual level varibles
#age_group:


multilevel<-mutate(multilevel,
              agr_15_19=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="15-19",1,0)),
              agr_20_24=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="20-24",1,0)),
              agr_25_29=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="25-29",1,0)),
              agr_30_34=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="30-34",1,0)),
              agr_35_39=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="35-39",1,0)),
              agr_40_44=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="40-44",1,0)),
              agr_45_49=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="45-49",1,0)),
              agr_50_54=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="50-54",1,0)),
              agr_55_59=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="55-59",1,0)),
              agr_60_64=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="60-64",1,0)),
              agr_65=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp==">65",1,0))
)


#household wealth quintile
multilevel<-mutate(multilevel,
                   wq_1_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==1,1,0)),
                   wq_2_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==2,1,0)),
                   wq_3_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==3,1,0)),
                   wq_4_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==4,1,0)),
                   wq_5_n=ifelse(is.na(wealth_quintile_rurb)==T,NA,ifelse(wealth_quintile_rurb==5,1,0))
                  
)


#education

multilevel<-mutate(multilevel,
                   ed_1_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==1,1,0)),
                   ed_3_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==3,1,0)),
                   ed_4_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==4,1,0)),
                   ed_5_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==5,1,0)),
                   ed_6_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==6,1,0))
)



#sex


multilevel<-mutate(multilevel,
                   sex_center=sex)


#urban


multilevel<-mutate(multilevel,
                   urban_center=urban)





###############district-level variables:

#district wealth; continuous->scaled
multilevel<-mutate(multilevel,
                   medianai_center=(medianai-(mean(medianai, na.rm=TRUE)))/(2* sd(medianai,na.rm=TRUE)))


##urban_prop#continuous->scaled
multilevel<-mutate(multilevel,
                   urban_prop_center=(urban_prop-(mean(urban_prop, na.rm=TRUE)))/(2* sd(urban_prop,na.rm=TRUE)))


###educational attainment#continous->scaled
multilevel<-mutate(multilevel,
                   ed_att_new_center=(ed_att_new-(mean(ed_att_new, na.rm=TRUE)))/(2* sd(ed_att_new,na.rm=TRUE)))



##PCI_districts#continuous->scaled

multilevel$PCI_districts<-as.numeric(multilevel$PCI_districts)
multilevel<-mutate(multilevel,
                   PCI_district_center=(PCI_districts-(mean(PCI_districts, na.rm=TRUE)))/(2* sd(PCI_districts,na.rm=TRUE)))




##female literacy rate#continuous->scaled


multilevel<-mutate(multilevel,
                   literacy_rate_female_center=(literacy_rate_female-(mean(literacy_rate_female, na.rm=TRUE)))/(2* sd(literacy_rate_female,na.rm=TRUE)))





```

```{r level 1}




#diabetes
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:17,]
coeffs_diab_1<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_1,"DLHS_4_AHS_nat_wq_nat_wq_diab_1.csv")


#hypertension
htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:17,]
coeffs_htn_1<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_1,"DLHS_4_AHS_nat_wq_nat_wq_htn_1.csv")



#bmi

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:17,]
coeffs_bmi_1<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_1,"DLHS_4_AHS_nat_wq_bmi_1.csv")


#csmoke
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:17,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"DLHS_4_AHS_nat_wq_csmoke_1.csv")








```










```{r lmer analysis diabetes}



#median center

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+medianai_center+medianai_center:wq_2_n+medianai_center:wq_3_n+medianai_center:wq_4_n+medianai_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_medi<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_medi,"DLHS_4_AHS_nat_wq_diab_medianai_center.csv")



#PCI

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2_n+PCI_district_center:wq_3_n+PCI_district_center:wq_4_n+PCI_district_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_pci<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_pci,"DLHS_4_AHS_nat_wq_diab_pci.csv")





##educational attainment 

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2_n+ed_att_new_center:wq_3_n+ed_att_new_center:wq_4_n+ed_att_new_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_ed_att_new<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_ed_att_new,"DLHS_4_AHS_nat_wq_diab_ed_att_new.csv")




##urban prop

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2_n+urban_prop_center:wq_3_n+urban_prop_center:wq_4_n+urban_prop_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_urban_prop<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_urban_prop,"DLHS_4_AHS_nat_wq_diab_urban_prop.csv")




#Female literacy rate
 
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2_n+literacy_rate_female_center:wq_3_n+literacy_rate_female_center:wq_4_n+literacy_rate_female_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_literacy_rate_female<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_literacy_rate_female,"DLHS_4_AHS_nat_wq_diab_literacy_rate_female.csv")





```





```{r lmer analysis hypertension}

#median center

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+medianai_center+medianai_center:wq_2_n+medianai_center:wq_3_n+medianai_center:wq_4_n+medianai_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22,]
coeffs_htn_medi<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_medi,"DLHS_4_AHS_nat_wq_htn_medianai_center.csv")





#PCI

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2_n+PCI_district_center:wq_3_n+PCI_district_center:wq_4_n+PCI_district_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22,]
coeffs_htn_pci<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_pci,"DLHS_4_AHS_nat_wq_htn_pci.csv")






##educational attainment 

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2_n+ed_att_new_center:wq_3_n+ed_att_new_center:wq_4_n+ed_att_new_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22,]
coeffs_htn_ed_att_new<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_ed_att_new,"DLHS_4_AHS_nat_wq_htn_ed_att_new.csv")




##urban prop

htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2_n+urban_prop_center:wq_3_n+urban_prop_center:wq_4_n+urban_prop_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22,]
coeffs_htn_urban_prop<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_urban_prop,"DLHS_4_AHS_nat_wq_htn_urban_prop.csv")






#Female literacy rate
 
htn_all_lmer<-lmer(formula =ex_htn_narrow_ind_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2_n+literacy_rate_female_center:wq_3_n+literacy_rate_female_center:wq_4_n+literacy_rate_female_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_htn_all <- htn_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_htn_all <-coeffs_htn_all[1:22,]
coeffs_htn_literacy_rate_female<-mutate(coeffs_htn_all,p.value=coef(summary(htn_all_lmer))[,5])

write.csv(coeffs_htn_literacy_rate_female,"DLHS_4_AHS_nat_wq_htn_literacy_rate_female.csv")




```

```{r lmer analysis obesity}



#median center

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+medianai_center+medianai_center:wq_2_n+medianai_center:wq_3_n+medianai_center:wq_4_n+medianai_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_medi<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_medi,"DLHS_4_AHS_nat_wq_bmi_medianai_center.csv")



#PCI

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2_n+PCI_district_center:wq_3_n+PCI_district_center:wq_4_n+PCI_district_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_pci<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_pci,"DLHS_4_AHS_nat_wq_bmi_pci.csv")




##educational attainment 

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2_n+ed_att_new_center:wq_3_n+ed_att_new_center:wq_4_n+ed_att_new_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_ed_att_new<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_ed_att_new,"DLHS_4_AHS_nat_wq_bmi_ed_att_new.csv")




##urban prop

bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2_n+urban_prop_center:wq_3_n+urban_prop_center:wq_4_n+urban_prop_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_urban_prop<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_urban_prop,"DLHS_4_AHS_nat_wq_bmi_urban_prop.csv")




#Female literacy rate
 
bmi_all_lmer<-lmer(formula =bmigrt27.5_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2_n+literacy_rate_female_center:wq_3_n+literacy_rate_female_center:wq_4_n+literacy_rate_female_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_bmi_all <- bmi_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_bmi_all <-coeffs_bmi_all[1:22,]
coeffs_bmi_literacy_rate_female<-mutate(coeffs_bmi_all,p.value=coef(summary(bmi_all_lmer))[,5])

write.csv(coeffs_bmi_literacy_rate_female,"DLHS_4_AHS_nat_wq_bmi_literacy_rate_female.csv")




```



```{r lmer analysis currently smoking }




#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+medianai_center+medianai_center:wq_2_n+medianai_center:wq_3_n+medianai_center:wq_4_n+medianai_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"DLHS_4_AHS_nat_wq_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2_n+ PCI_district_center:wq_3_n+PCI_district_center:wq_4_n+ PCI_district_center:wq_5_n+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"DLHS_4_AHS_nat_wq_csmoke_pci.csv")






##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2_n+ed_att_new_center:wq_3_n+ed_att_new_center:wq_4_n+ed_att_new_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"DLHS_4_AHS_nat_wq_csmoke_ed_att_new.csv")




##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2_n+urban_prop_center:wq_3_n+urban_prop_center:wq_4_n+urban_prop_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"DLHS_4_AHS_nat_wq_csmoke_urban_prop.csv")







#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2_n+wq_3_n+wq_4_n+wq_5_n+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2_n+literacy_rate_female_center:wq_3_n+literacy_rate_female_center:wq_4_n+literacy_rate_female_center:wq_5_n+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:22,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"DLHS_4_AHS_nat_wq_csmoke_literacy_rate_female.csv")




```



#Multilevel Model: currently smoking stratisfied by gender

#####female

```{r select variables for multilevel modeling }

multilevel<-dplyr::select(India_DLHS_AHS,ex_d_name_ind,ex_diab_narrow_ind_dbl, ex_diab_narrow_ind_ahs_unfasted_dbl,ex_htn_narrow_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,ed_5,hh_wealth_quintile_district,hh_wealth_quintile_district_c,asset_index_combined,wealth_quintile_rurb,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai)

multilevel<-dplyr::filter(multilevel,sex==1)

library(broom.mixed) #to extract coefficents (for mixed models)
library(lme4)
library(lmerTest)

```

```{r convert categorical varibales with >1 level in binary variables& perform Grand Mean Centering for continuous varibales}


#####################individual level varibles
#age_group:


multilevel<-mutate(multilevel,
              agr_15_19=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="15-19",1,0)),
              agr_20_24=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="20-24",1,0)),
              agr_25_29=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="25-29",1,0)),
              agr_30_34=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="30-34",1,0)),
              agr_35_39=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="35-39",1,0)),
              agr_40_44=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="40-44",1,0)),
              agr_45_49=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="45-49",1,0)),
              agr_50_54=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="50-54",1,0)),
              agr_55_59=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="55-59",1,0)),
              agr_60_64=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="60-64",1,0)),
              agr_65=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp==">65",1,0))
)


#district household_wealth_quintile:


multilevel<-mutate(multilevel,
                   wq_1=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   wq_2=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==2,1,0)),
                   wq_3=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==3,1,0)),
                   wq_4=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==4,1,0)),
                   wq_5=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
                  
)



#education

multilevel<-mutate(multilevel,
                   ed_1_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==1,1,0)),
                   ed_3_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==3,1,0)),
                   ed_4_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==4,1,0)),
                   ed_5_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==5,1,0)),
                   ed_6_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==6,1,0))
)



#sex


multilevel<-mutate(multilevel,
                   sex_center=sex)


#urban


multilevel<-mutate(multilevel,
                   urban_center=urban)





###############district-level variables:

#district wealth; continuous->scaled
multilevel<-mutate(multilevel,
                   medianai_center=(medianai-(mean(medianai, na.rm=TRUE)))/(2* sd(medianai,na.rm=TRUE)))


##urban_prop#continuous->scaled
multilevel<-mutate(multilevel,
                   urban_prop_center=(urban_prop-(mean(urban_prop, na.rm=TRUE)))/(2* sd(urban_prop,na.rm=TRUE)))


###educational attainment#continous->scaled
multilevel<-mutate(multilevel,
                   ed_att_new_center=(ed_att_new-(mean(ed_att_new, na.rm=TRUE)))/(2* sd(ed_att_new,na.rm=TRUE)))



##PCI_districts#continuous->scaled

multilevel$PCI_districts<-as.numeric(multilevel$PCI_districts)
multilevel<-mutate(multilevel,
                   PCI_district_center=(PCI_districts-(mean(PCI_districts, na.rm=TRUE)))/(2* sd(PCI_districts,na.rm=TRUE)))




##female literacy rate#continuous->scaled


multilevel<-mutate(multilevel,
                   literacy_rate_female_center=(literacy_rate_female-(mean(literacy_rate_female, na.rm=TRUE)))/(2* sd(literacy_rate_female,na.rm=TRUE)))





```




```{r lmer analysis wealth (quintiles computed for each district)}


#csmoke level1
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:16,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"female_DLHS_4_AHS_csmoke_1.csv")


#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"female_DLHS_4_AHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+PCI_district_center+PCI_district_center:wq_2+ PCI_district_center:wq_3+PCI_district_center:wq_4+ PCI_district_center:wq_5+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"female_DLHS_4_AHS_csmoke_pci.csv")






##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"female_DLHS_4_AHS_csmoke_ed_att_new.csv")




##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"female_DLHS_4_AHS_csmoke_urban_prop.csv")



#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"female_DLHS_4_AHS_csmoke_literacy_rate_female.csv")



```
```{r lmer analysis education}


#level1
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:16 ,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"female_ed_DLHS_4_AHS_csmoke_1.csv")



#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+medianai_center+medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"female_ed_DLHS_4_AHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+PCI_district_center+  PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+ PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21 ,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"female_ed_DLHS_4_AHS_csmoke_pci.csv")



##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+ed_att_new_center+ ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"female_ed_DLHS_4_AHS_csmoke_ed_att_new.csv")




##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+urban_prop_center+  +urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21 ,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"female_ed_DLHS_4_AHS_csmoke_urban_prop.csv")





#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21 ,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])



write.csv(coeffs_csmoke_literacy_rate_female,"female_ed_DLHS_4_AHS_csmoke_literacy_rate_female.csv")




```
#####male
```{r select variables for multilevel modeling }


multilevel<-dplyr::select(India_DLHS_AHS,ex_d_name_ind,ex_diab_narrow_ind_dbl, ex_diab_narrow_ind_ahs_unfasted_dbl,ex_htn_narrow_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,ed_5,hh_wealth_quintile_district,hh_wealth_quintile_district_c,asset_index_combined,wealth_quintile_rurb,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai)


multilevel<-dplyr::filter(multilevel,sex==0)


library(broom.mixed) #to extract coefficents (for mixed models)
library(lme4)
library(lmerTest)

```

```{r convert categorical varibales with >1 level in binary variables& perform Grand Mean Centering for continuous varibales}


#####################individual level varibles
#age_group:


multilevel<-mutate(multilevel,
              agr_15_19=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="15-19",1,0)),
              agr_20_24=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="20-24",1,0)),
              agr_25_29=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="25-29",1,0)),
              agr_30_34=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="30-34",1,0)),
              agr_35_39=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="35-39",1,0)),
              agr_40_44=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="40-44",1,0)),
              agr_45_49=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="45-49",1,0)),
              agr_50_54=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="50-54",1,0)),
              agr_55_59=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="55-59",1,0)),
              agr_60_64=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="60-64",1,0)),
              agr_65=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp==">65",1,0))
)


#district household_wealth_quintile:


multilevel<-mutate(multilevel,
                   wq_1=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   wq_2=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==2,1,0)),
                   wq_3=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==3,1,0)),
                   wq_4=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==4,1,0)),
                   wq_5=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
                  
)




#education

multilevel<-mutate(multilevel,
                   ed_1_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==1,1,0)),
                   ed_3_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==3,1,0)),
                   ed_4_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==4,1,0)),
                   ed_5_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==5,1,0)),
                   ed_6_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==6,1,0))
)



#sex


multilevel<-mutate(multilevel,
                   sex_center=sex)


#urban


multilevel<-mutate(multilevel,
                   urban_center=urban)





###############district-level variables:

#district wealth; continuous->scaled
multilevel<-mutate(multilevel,
                   medianai_center=(medianai-(mean(medianai, na.rm=TRUE)))/(2* sd(medianai,na.rm=TRUE)))


##urban_prop#continuous->scaled
multilevel<-mutate(multilevel,
                   urban_prop_center=(urban_prop-(mean(urban_prop, na.rm=TRUE)))/(2* sd(urban_prop,na.rm=TRUE)))


###educational attainment#continous->scaled
multilevel<-mutate(multilevel,
                   ed_att_new_center=(ed_att_new-(mean(ed_att_new, na.rm=TRUE)))/(2* sd(ed_att_new,na.rm=TRUE)))



##PCI_districts#continuous->scaled

multilevel$PCI_districts<-as.numeric(multilevel$PCI_districts)
multilevel<-mutate(multilevel,
                   PCI_district_center=(PCI_districts-(mean(PCI_districts, na.rm=TRUE)))/(2* sd(PCI_districts,na.rm=TRUE)))




##female literacy rate#continuous->scaled


multilevel<-mutate(multilevel,
                   literacy_rate_female_center=(literacy_rate_female-(mean(literacy_rate_female, na.rm=TRUE)))/(2* sd(literacy_rate_female,na.rm=TRUE)))





```




```{r lmer analysis wealth (quintiles computed for each district) }


#csmoke level1
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:16,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"male_DLHS_4_AHS_csmoke_1.csv")



#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+medianai_center+medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"male_DLHS_4_AHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+PCI_district_center+PCI_district_center:wq_2+ PCI_district_center:wq_3+PCI_district_center:wq_4+ PCI_district_center:wq_5+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"male_DLHS_4_AHS_csmoke_pci.csv")




##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"male_DLHS_4_AHS_csmoke_ed_att_new.csv")



##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"male_DLHS_4_AHS_csmoke_urban_prop.csv")



#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"male_DLHS_4_AHS_csmoke_literacy_rate_female.csv")



```



```{r lmer analysis education}


#level1
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:16 ,]
coeffs_csmoke_1<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_1,"male_ed_DLHS_4_AHS_csmoke_1.csv")


#median center

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+medianai_center+medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_medi<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_medi,"male_ed_DLHS_4_AHS_csmoke_medianai_center.csv")


#PCI

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+PCI_district_center+  PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+ PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21 ,]
coeffs_csmoke_pci<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_pci,"male_ed_DLHS_4_AHS_csmoke_pci.csv")



##educational attainment 

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+ed_att_new_center+ ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21,]
coeffs_csmoke_ed_att_new<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_ed_att_new,"male_ed_DLHS_4_AHS_csmoke_ed_att_new.csv")



##urban prop

csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+urban_prop_center+  +urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21 ,]
coeffs_csmoke_urban_prop<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_urban_prop,"male_ed_DLHS_4_AHS_csmoke_urban_prop.csv")





#Female literacy rate
 
csmoke_all_lmer<-lmer(formula =csmoke_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+(1|ex_d_name_ind), data=multilevel)

coeffs_csmoke_all <- csmoke_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_csmoke_all <-coeffs_csmoke_all[1:21 ,]
coeffs_csmoke_literacy_rate_female<-mutate(coeffs_csmoke_all,p.value=coef(summary(csmoke_all_lmer))[,5])

write.csv(coeffs_csmoke_literacy_rate_female,"male_ed_DLHS_4_AHS_csmoke_literacy_rate_female.csv")




```


















#Multilevel Model: Wealth & Education interaction: Diabetes unfasted

```{r select variables for multilevel modeling }

multilevel<-dplyr::select(India_DLHS_AHS,ex_d_name_ind,ex_diab_narrow_ind_dbl, ex_diab_narrow_ind_ahs_unfasted_dbl,ex_htn_narrow_ind_dbl,bmigrt27.5_dbl,csmoke_dbl,sex,age_grp,urban,ed_5,hh_wealth_quintile_district,hh_wealth_quintile_district_c,asset_index_combined,wealth_quintile_rurb,urban_prop,rural_prop,PCI_districts,literacy_rate_female,ed_att_new,medianai)


library(broom.mixed) #to extract coefficents (for mixed models)
library(lme4)
library(lmerTest)
```

```{r convert categorical varibales with >1 level in binary variables& perform Grand Mean Centering for continuous varibales}


#####################individual level varibles
#age_group:


multilevel<-mutate(multilevel,
              agr_15_19=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="15-19",1,0)),
              agr_20_24=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="20-24",1,0)),
              agr_25_29=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="25-29",1,0)),
              agr_30_34=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="30-34",1,0)),
              agr_35_39=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="35-39",1,0)),
              agr_40_44=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="40-44",1,0)),
              agr_45_49=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="45-49",1,0)),
              agr_50_54=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="50-54",1,0)),
              agr_55_59=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="55-59",1,0)),
              agr_60_64=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp=="60-64",1,0)),
              agr_65=ifelse(is.na(age_grp)==T,NA,ifelse(age_grp==">65",1,0))
)


#district household_wealth_quintile:


multilevel<-mutate(multilevel,
                   wq_1=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==1,1,0)),
                   wq_2=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==2,1,0)),
                   wq_3=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==3,1,0)),
                   wq_4=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==4,1,0)),
                   wq_5=ifelse(is.na(hh_wealth_quintile_district)==T,NA,ifelse(hh_wealth_quintile_district==5,1,0))
                  
)




#education

multilevel<-mutate(multilevel,
                   ed_1_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==1,1,0)),
                   ed_3_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==3,1,0)),
                   ed_4_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==4,1,0)),
                   ed_5_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==5,1,0)),
                   ed_6_o=ifelse(is.na(ed_5)==T,NA,ifelse(ed_5==6,1,0))
)



#sex


multilevel<-mutate(multilevel,
                   sex_center=sex)


#urban


multilevel<-mutate(multilevel,
                   urban_center=urban)





###############district-level variables:

#district wealth; continuous->scaled
multilevel<-mutate(multilevel,
                   medianai_center=(medianai-(mean(medianai, na.rm=TRUE)))/(2* sd(medianai,na.rm=TRUE)))


##urban_prop#continuous->scaled
multilevel<-mutate(multilevel,
                   urban_prop_center=(urban_prop-(mean(urban_prop, na.rm=TRUE)))/(2* sd(urban_prop,na.rm=TRUE)))


###educational attainment#continous->scaled
multilevel<-mutate(multilevel,
                   ed_att_new_center=(ed_att_new-(mean(ed_att_new, na.rm=TRUE)))/(2* sd(ed_att_new,na.rm=TRUE)))



##PCI_districts#continuous->scaled

multilevel$PCI_districts<-as.numeric(multilevel$PCI_districts)
multilevel<-mutate(multilevel,
                   PCI_district_center=(PCI_districts-(mean(PCI_districts, na.rm=TRUE)))/(2* sd(PCI_districts,na.rm=TRUE)))




##female literacy rate#continuous->scaled


multilevel<-mutate(multilevel,
                   literacy_rate_female_center=(literacy_rate_female-(mean(literacy_rate_female, na.rm=TRUE)))/(2* sd(literacy_rate_female,na.rm=TRUE)))





```

```{r lmer analysis wealth (quintiles computed for each district) }



#level 1
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:17,]
coeffs_diab_1<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_1,"DLHS_diab_1_ahs_unfasted.csv")

#median center

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+medianai_center+ medianai_center:wq_2+medianai_center:wq_3+medianai_center:wq_4+medianai_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_medi<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_medi,"DLHS_diab_medianai_center_ahs_unfasted.csv")



#PCI

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+PCI_district_center+PCI_district_center:wq_2+PCI_district_center:wq_3+PCI_district_center:wq_4+PCI_district_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_pci<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_pci,"DLHS_diab_pci_ahs_unfasted.csv")





##educational attainment 

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+ed_att_new_center+ed_att_new_center:wq_2+ed_att_new_center:wq_3+ed_att_new_center:wq_4+ed_att_new_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_ed_att_new<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_ed_att_new,"DLHS_diab_ed_att_new_ahs_unfasted.csv")




##urban prop

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+urban_prop_center+urban_prop_center:wq_2+urban_prop_center:wq_3+urban_prop_center:wq_4+urban_prop_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_urban_prop<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_urban_prop,"DLHS_diab_urban_prop_ahs_unfasted.csv")




#Female literacy rate
 
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+wq_2+wq_3+wq_4+wq_5+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:wq_2+literacy_rate_female_center:wq_3+literacy_rate_female_center:wq_4+literacy_rate_female_center:wq_5+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_literacy_rate_female<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_literacy_rate_female,"DLHS_diab_literacy_rate_female_ahs_unfasted.csv")


```
```{r lmer analysis education }

#level1
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+(1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:17 ,]
coeffs_diab_1<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_1,"ed_DLHS_diab_1_ahs_unfasted.csv")





#median center

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+medianai_center+ medianai_center:ed_3_o+medianai_center:ed_4_o+medianai_center:ed_5_o+medianai_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22 ,]
coeffs_diab_medi<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_medi,"ed_DLHS_diab_medianai_center_ahs_unfasted.csv")



#PCI

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+PCI_district_center+  PCI_district_center:ed_3_o+PCI_district_center:ed_4_o+PCI_district_center:ed_5_o+PCI_district_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22 ,]
coeffs_diab_pci<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_pci,"ed_DLHS_diab_pci_ahs_unfasted.csv")




##educational attainment 

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+ed_att_new_center+ ed_att_new_center:ed_3_o+ed_att_new_center:ed_4_o+ ed_att_new_center:ed_5_o+ed_att_new_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22,]
coeffs_diab_ed_att_new<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_ed_att_new,"ed_DLHS_diab_ed_att_new_ahs_unfasted.csv")




##urban prop

diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+urban_prop_center+ urban_prop_center:ed_3_o+urban_prop_center:ed_4_o+urban_prop_center:ed_5_o+urban_prop_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22 ,]
coeffs_diab_urban_prop<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_urban_prop,"ed_DLHS_diab_urban_prop_ahs_unfasted.csv")




#Female literacy rate
 
diab_all_lmer<-lmer(formula =ex_diab_narrow_ind_ahs_unfasted_dbl~agr_20_24+agr_25_29+ agr_30_34+agr_35_39+agr_40_44+agr_45_49+agr_50_54+agr_55_59+agr_60_64+agr_65+agr_55_59+agr_60_64+agr_65+ed_3_o+ed_4_o+ed_5_o+ed_6_o+urban_center+sex_center+literacy_rate_female_center+literacy_rate_female_center:ed_3_o+literacy_rate_female_center:ed_4_o+literacy_rate_female_center:ed_5_o+literacy_rate_female_center:ed_6_o+ (1|ex_d_name_ind), data=multilevel)

coeffs_diab_all <- diab_all_lmer%>% tidy(conf.int=TRUE,conf.method="Wald")
coeffs_diab_all <-coeffs_diab_all[1:22 ,]
coeffs_diab_literacy_rate_female<-mutate(coeffs_diab_all,p.value=coef(summary(diab_all_lmer))[,5])

write.csv(coeffs_diab_literacy_rate_female,"ed_DLHS_diab_literacy_rate_female_ahs_unfasted.csv")




```
